Posts: 25
Threads: 5
Joined: Mar 2022
Apr-24-2022, 02:25 AM
(This post was last modified: Apr-24-2022, 02:25 AM by longmen.)
(Apr-23-2022, 10:32 PM)deanhystad Wrote: I still think your building the board incorrectly. You should not be using insert() to set moves or values.
You should describe how your Q_learning function is supposed to work.
Can you elaborate on insert() to set move or values? Also why you think that the Q_learning function is not working? Thanks
Here is update on the code
import random
import numpy as np
import enum
EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1 # Discount Rate
LEARNING_RATE_ALPHA = 0.3 # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5 # Greedy Probability
ITERATION_MAX_NUM = 10000 # Will be 10,000
START_STATE = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4
class Direction(enum.Enum):
up = 0
right = 1
down = 2
left = 3
class Node:
def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
self.title = title
self.next = next
self.qValues = [qValues] * 4
self.move = [actions] * 4
self.goal = Goal
self.forbidden = Forbidden
self.wall = Wall
def max_Q_value(self):
if self.wall:
return False
max_q = []
for q in self.qValues:
if q is not None:
max_q.append(q)
return max(max_q)
def find_best_move(self):
max_q = self.max_Q_value()
q_index = self.qValues.index(max_q)
return Direction(q_index)
class create_env:
def __init__(self, input_list, wall=None):
self.wall = wall
self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
S = 2
Node_1 = Node(1, [5, S, self.wall, self.wall])
Node_Start = Node(S, [6, 3, self.wall, 1])
Node_3 = Node(3, [S, 7, 4, self.wall])
Node_4 = Node(4, [8, self.wall, self.wall, 3])
Node_5 = Node(5, [9, 6, 1, self.wall])
Node_6 = Node(6, [10, 7, S, 5])
Node_7 = Node(7, [11, 8, 3, 6])
Node_8 = Node(8, [12, self.wall, 4, 7])
Node_9 = Node(9, [13, 10, 5, self.wall])
Node_10 = Node(10, [14, 11, 6, 9])
Node_11 = Node(11, [15, 12, 7, 10])
Node_12 = Node(12, [16, self.wall, 8, 11])
Node_13 = Node(13, [self.wall, 14, 9, self.wall])
Node_14 = Node(14, [self.wall, 15, 10, 13])
Node_15 = Node(15, [self.wall, 16, 11, 14])
Node_16 = Node(16, [self.wall, self.wall, 12, 15])
self.episode[0][0] = Node_1
self.episode[0][1] = Node_Start
self.episode[0][S] = Node_3
self.episode[0][3] = Node_4
self.episode[1][0] = Node_5
self.episode[1][1] = Node_6
self.episode[1][S] = Node_7
self.episode[1][3] = Node_8
self.episode[S][0] = Node_9
self.episode[S][1] = Node_10
self.episode[S][S] = Node_11
self.episode[S][3] = Node_12
self.episode[3][0] = Node_13
self.episode[3][1] = Node_14
self.episode[3][S] = Node_15
self.episode[3][3] = Node_16
self.goal_labels = [int(input_list[0]), int(input_list[1])]
self.forbidden_label = int(input_list[2])
self.wall_label = int(input_list[3])
x = 0
while x < LEVEL:
y = 0
while y < LEVEL:
current_episode = self.episode[x][y]
if current_episode.title in self.goal_labels:
current_episode.goal = 1
current_episode.move.append(4)
current_episode.qValues.append(4)
elif current_episode.title == self.forbidden_label:
current_episode.forbidden = 1
current_episode.move.append(4)
current_episode.qValues.append(4)
elif current_episode.title == self.wall_label:
current_episode.wall = 1
else:
position = 0
while position < LEVEL:
if current_episode.next[position] is not None:
current_episode.move.append(Direction(position)), current_episode.qValues.insert(
position, False)
position += 1
y += 1
x += 1
def get_episode(self, name):
for x in self.episode:
for episode in x:
if episode.title == name:
# print(episode)
return episode
def print_best_actions(self):
for row in self.episode:
for episode in row:
if episode.goal:
best_action_str = 'Direction.goal'
elif episode.forbidden:
best_action_str = "Direction.forbid"
elif episode.wall:
best_action_str = 'Direction.wall-square'
else:
best_action_str = str(episode.find_best_move())
print(str(episode.title) + " " + best_action_str[10:])
def print_four_Q_value(self, index):
episode = self.get_episode(index)
print("up" + ' ' + str(round(episode.qValues[1], 2)))
print("right" + ' ' + str(round(episode.qValues[2], 2)))
print("down" + ' ' + str(round(episode.qValues[3], 2)))
print("left" + ' ' + str(round(episode.qValues[0], 2)))
def Q_learning(environment, print_best_actions, index):
for iteration in range(ITERATION_MAX_NUM):
current_episode = environment.get_episode(START_STATE)
total_episode_reward = 0
for episode in range(ITERATION_MAX_NUM):
if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
next_move = []
for score in current_episode.move:
if score is not None:
next_move.append(score)
# print(score)
next_move = random.choice(next_move)
else:
next_move = current_episode.find_best_move()
next_episode = environment.get_episode(current_episode.next[next_move.value])
if next_episode.goal:
reward = GOAL_SQUARE_REWARD
elif next_episode.forbidden:
reward = FORBIDDEN_SQUARE_REWARD
else:
reward = EACH_STEP_REWARD
total_episode_reward += reward
old_q = current_episode.qValues[next_move.value]
new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
# print(new_q)
current_episode.qValues[next_move.value] = new_q
# print(current_episode.qValues[next_move.value])
if next_episode.goal:
break
elif next_episode.forbidden:
break
else:
if next_episode.wall:
break
else:
current_episode = next_episode
def user_input():
try:
input_list = []
input_str = input()
input_list = input_str.split()
except:
print("The input should be like: 15 12 8 6 p")
environment = create_env(input_list)
if (len(input_list) == 5) and (input_list[-1] == 'p'):
Q_learning(environment, 1, 0)
environment.print_best_actions()
elif (len(input_list) == 6) and (input_list[-2] == 'q'):
Q_learning(environment, 0, int(input_list[5]))
environment.print_four_Q_value(int(input_list[5]))
user_input()
Posts: 6,827
Threads: 20
Joined: Feb 2020
As far as this:
Quote:Can you elaborate on insert() to set move or values?
What do you think list.insert(value) does? It is not the correct way to SET values in a list.
I would like you to describe your Q learning algorithm. I cannot tell if it contains errors in logic or execution or my not understanding your requirements without knowing what you think it is supposed to do. For example, you always start at the same square but it appears that you want a solution for the entire map. I can design a map where that is not possible. It also seems a very inefficient way to generate a map, and even with 10000 iterations there remains a possibility that you do not try all routes. You calculate a total episode reward that is never used. You pass arguments to Q_learning() that are not used.
I think I implemented your algorithm and I get this result for inputs: 15 12 8 6 p
Output: 13 → 14 → 15 G 16 ←
9 ↑ 10 ↑ 11 ↑ 12 G
5 ↑ 6 W 7 ↑ 8 F
1 ↑ 2 → 3 ↑ 4 ←
I think this is correct. 16 cannot be up as you indicate in your expected results. 16 does not have an "Up" neighbor.
Posts: 25
Threads: 5
Joined: Mar 2022
(Apr-24-2022, 02:59 AM)deanhystad Wrote: As far as this:
Quote:Can you elaborate on insert() to set move or values?
What do you think list.insert(value) does? It is not the correct way to SET values in a list.
I would like you to describe your Q learning algorithm. I cannot tell if it contains errors in logic or execution or my not understanding your requirements without knowing what you think it is supposed to do. For example, you always start at the same square but it appears that you want a solution for the entire map. I can design a map where that is not possible. It also seems a very inefficient way to generate a map, and even with 10000 iterations there remains a possibility that you do not try all routes. You calculate a total episode reward that is never used. You pass arguments to Q_learning() that are not used.
I think I implemented your algorithm and I get this result for inputs: 15 12 8 6 p
Output: 13 → 14 → 15 G 16 ←
9 ↑ 10 ↑ 11 ↑ 12 G
5 ↑ 6 W 7 ↑ 8 F
1 ↑ 2 → 3 ↑ 4 ←
I think this is correct. 16 cannot be up as you indicate in your expected results. 16 does not have an "Up" neighbor.
But my assignment requires it to go up at 16 index :/
Posts: 6,827
Threads: 20
Joined: Feb 2020
If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall.
Though your solution is inefficient I think it works. Any problems you have are probably errors setting up the board.
Posts: 25
Threads: 5
Joined: Mar 2022
(Apr-24-2022, 03:31 PM)deanhystad Wrote: If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall.
Though your solution is inefficient I think it works. Any problems you have are probably errors setting up the board.
Hi, I figured out the issue. There is one more thing I hope you could advise me. With this input 15 12 8 6 q 11 it prints out
Action.UP 99.99999999999999
Action.UP 99.99999999999999
Action.DOWN 0.8899999999999998
Action.DOWN 0.8899999999999998 However, I want it to update from UP to RIGHT if it prints UP for the second time and print out LEFT at the fourth output if it shows up as DOWN. Its output should always print out as UP RIGHT DOWN LEFT. Could you please advise? Thanks for all your responses!
Posts: 6,827
Threads: 20
Joined: Feb 2020
Apr-24-2022, 05:21 PM
(This post was last modified: Apr-24-2022, 05:21 PM by deanhystad.)
You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected.
Did you change your search order from LURD to URDL? Is that what "fixed" your output? If so, that just means that 16 was never evaluated. This makes sense because it is not a neighbor of any open (not goal, forbidden or wall) tile.
Posts: 25
Threads: 5
Joined: Mar 2022
(Apr-24-2022, 03:58 PM)deanhystad Wrote: You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected.
I figured out that also. However, there is one more issue. As it shown below, UP value is wrong sometimes and DOWN value is always wrong. I wonder if you could help? I also provide the code below. Thanks
10 8 9 6 q 2
My output
up -0.1
right 0.89
down -0.1
left -0.1
Expected Output
up -0.01
right 0.89
down -0.01
left -0.1
12 7 5 6 q 3
My output
up 100.0
right 0.89
down -0.01
left 0.89
Expected Output
up 100.0
right 0.89
down 9.9
left 0.89
import random
import numpy as np
import enum
EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1 # Discount Rate
LEARNING_RATE_ALPHA = 0.3 # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5 # Greedy Probability
ITERATION_MAX_NUM = 1000000 # Will be 10,000
START_LABEL = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4
class Direction(enum.Enum):
up = 0
right = 1
down = 2
left = 3
class Node:
def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
self.title = title
self.next = next
self.qValues = [qValues] * 5
self.move = [actions] * 5
self.goal = Goal
self.forbidden = Forbidden
self.wall = Wall
def max_Q_value(self):
if self.wall:
return False
max_q = []
for q in self.qValues:
if q is not None:
max_q.append(q)
return max(max_q)
def find_best_move(self):
max_q = self.max_Q_value()
q_index = self.qValues.index(max_q)
return Direction(q_index)
class create_env:
def __init__(self, input_list, wall=None):
self.wall = wall
self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
S = 2
Node_1 = Node(1, [5, S, 1, 1])
Node_Start = Node(S, [6, 3, 1, 1])
Node_3 = Node(3, [7, 4, 1, S])
Node_4 = Node(4, [8, 1, 1, 3])
Node_5 = Node(5, [9, self.wall, 1, 1])
Node_6 = Node(6, [10, 7, S, 5])
Node_7 = Node(7, [11, 8, 3, 6])
Node_8 = Node(8, [12, self.wall, 4, 7])
Node_9 = Node(9, [13, 10, 5, 1])
Node_10 = Node(10, [14, 11, 6, 9])
Node_11 = Node(11, [15, 12, 7, 10])
Node_12 = Node(12, [16, self.wall, 8, 11])
Node_13 = Node(13, [1, 14, 9, self.wall])
Node_14 = Node(14, [1, 15, 10, 13])
Node_15 = Node(15, [1, 16, 11, 14, ])
Node_16 = Node(16, [1, self.wall, 12, 1])
self.episode[0][0] = Node_1
self.episode[0][1] = Node_Start
self.episode[0][S] = Node_3
self.episode[0][3] = Node_4
self.episode[1][0] = Node_5
self.episode[1][1] = Node_6
self.episode[1][S] = Node_7
self.episode[1][3] = Node_8
self.episode[S][0] = Node_9
self.episode[S][1] = Node_10
self.episode[S][S] = Node_11
self.episode[S][3] = Node_12
self.episode[3][0] = Node_13
self.episode[3][1] = Node_14
self.episode[3][S] = Node_15
self.episode[3][3] = Node_16
self.goal_labels = [int(input_list[0]), int(input_list[1])]
self.forbidden_label = int(input_list[2])
self.wall_label = int(input_list[3])
x = 0
while x < LEVEL:
y = 0
while y < LEVEL:
current_episode = self.episode[x][y]
if current_episode.title in self.goal_labels:
current_episode.goal = 1
current_episode.move.insert(4, 0)
current_episode.qValues.insert(4, 0)
elif current_episode.title == self.forbidden_label:
current_episode.forbidden = 1
current_episode.move.insert(4, 0)
current_episode.qValues.insert(4, 0)
elif current_episode.title == self.wall_label:
current_episode.wall = 1
else:
position = 0
while position < LEVEL:
if current_episode.next[position] is not None:
current_episode.move.insert(position,
Direction(position)), current_episode.qValues.insert(
position, False)
position += 1
y += 1
x += 1
def get_episode(self, name):
for x in self.episode:
for episode in x:
if episode.title == name:
# print(episode)
return episode
def print_best_actions(self):
for row in self.episode:
for episode in row:
if episode.goal:
best_action_str = 'Direction.goal'
elif episode.forbidden:
best_action_str = "Direction.forbid"
elif episode.wall:
best_action_str = 'Direction.wall-square'
else:
best_action_str = str(episode.find_best_move())
print(str(episode.title) + " " + best_action_str[10:])
def print_four_Q_value(self, index):
episode = self.get_episode(index)
print("up" + ' ' + str(round(episode.qValues[0], 2)))
print("right" + ' ' + str(round(episode.qValues[1], 2)))
print("down" + ' ' + str(round(episode.qValues[2], 2)))
print("left" + ' ' + str(round(episode.qValues[3], 2)))
def Q_learning(environment, print_best_actions, index):
for iteration in range(ITERATION_MAX_NUM):
current_episode = environment.get_episode(START_LABEL)
total_episode_reward = 0
for episode in range(10000):
if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
next_move = []
for score in current_episode.move:
if score is not None:
next_move.append(score)
next_move = random.choice(next_move)
else:
next_move = current_episode.find_best_move()
next_episode = environment.get_episode(current_episode.next[next_move.value])
if next_episode.goal:
reward = GOAL_SQUARE_REWARD
elif next_episode.forbidden:
reward = FORBIDDEN_SQUARE_REWARD
else:
reward = EACH_STEP_REWARD
total_episode_reward += reward
old_q = current_episode.qValues[next_move.value]
new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
current_episode.qValues[next_move.value] = new_q
if next_episode.goal:
break
elif next_episode.forbidden:
break
else:
if next_episode.wall:
break
else:
current_episode = next_episode
def user_input():
try:
input_list = []
input_str = input()
input_list = input_str.split()
except:
print("The input should be like: 15 12 8 6 p")
environment = create_env(input_list)
if (len(input_list) == 5) and (input_list[-1] == 'p'):
Q_learning(environment, 1, 0)
environment.print_best_actions()
elif (len(input_list) == 6) and (input_list[-2] == 'q'):
Q_learning(environment, 0, int(input_list[5]))
environment.print_four_Q_value(int(input_list[5]))
user_input()
Posts: 6,827
Threads: 20
Joined: Feb 2020
Apr-24-2022, 07:14 PM
(This post was last modified: Apr-24-2022, 07:14 PM by deanhystad.)
Is this correct?
Node_Start = Node(S, [6, 3, 1, 1])
Node_3 = Node(3, [7, 4, 1, S]) It think your board is still riddled with errors. You would be better off making the wall programmatically. You make too many mistakes when doing it by hand.
This is how I make my board. I use dictionaries instead of arrays
class Action(Enum):
"""Enumeration for movements (actions). Value is (row, column) steps to make move"""
UP = (1, 0)
RIGHT = (0, 1)
DOWN = (-1, 0)
LEFT = (0, -1)
def __str__(self):
"""Return arrow associated with action""""
if self is self.LEFT:
return '\u2190'
elif self is self.UP:
return '\u2191'
elif self is self.RIGHT:
return '\u2192'
elif self is self.DOWN:
return '\u2193'
return " "
class Board:
def __init__(self, goals, traps, walls, rows=4, columns=4):
"""Initialize the board"""
# Make all the tiles
self.rows = rows
self.columns = columns
self.tiles = {x:Tile(x, x in goals, x in traps, x in walls) for x in range(1, rows*columns+1)}
# Tell open tiles about their neighbors
for label in set(self.tiles) - set(goals + traps + walls):
tile = self.tiles[label]
for action in Action:
r, c = action.value
neighbor = self.tile(r + (label-1) // columns, c + (label-1) % columns)
# Make a list of neighboring tiles
tile.neighbor[action] = neighbor if neighbor and not neighbor.wall
tile.values[action] = 1 if neighbor and neighbor.wall else 0
def tile(self, label, column):
"""Return tile at intersection of row and column. Return None if row or column out of range"""
if 0 <= label < self.rows and 0 <= column < self.columns:
return self.tiles[label * self.columns + column + 1]
return None
Posts: 25
Threads: 5
Joined: Mar 2022
(Apr-24-2022, 07:14 PM)deanhystad Wrote: Is this correct?
Node_Start = Node(S, [6, 3, 1, 1])
Node_3 = Node(3, [7, 4, 1, S]) It think your board is still riddled with errors. You would be better off making the wall programmatically. You make too many mistakes when doing it by hand.
This is how I make my board. I use dictionaries instead of arrays
class Action(Enum):
"""Enumeration for movements (actions). Value is (row, column) steps to make move"""
UP = (1, 0)
RIGHT = (0, 1)
DOWN = (-1, 0)
LEFT = (0, -1)
def __str__(self):
"""Return arrow associated with action""""
if self is self.LEFT:
return '\u2190'
elif self is self.UP:
return '\u2191'
elif self is self.RIGHT:
return '\u2192'
elif self is self.DOWN:
return '\u2193'
return " "
class Board:
def __init__(self, goals, traps, walls, rows=4, columns=4):
"""Initialize the board"""
# Make all the tiles
self.rows = rows
self.columns = columns
self.tiles = {x:Tile(x, x in goals, x in traps, x in walls) for x in range(1, rows*columns+1)}
# Tell open tiles about their neighbors
for label in set(self.tiles) - set(goals + traps + walls):
tile = self.tiles[label]
for action in Action:
r, c = action.value
neighbor = self.tile(r + (label-1) // columns, c + (label-1) % columns)
# Make a list of neighboring tiles
tile.neighbor[action] = neighbor if neighbor and not neighbor.wall
tile.values[action] = 1 if neighbor and neighbor.wall else 0
def tile(self, label, column):
"""Return tile at intersection of row and column. Return None if row or column out of range"""
if 0 <= label < self.rows and 0 <= column < self.columns:
return self.tiles[label * self.columns + column + 1]
return None
Node_Start = Node(S, [6, 3, 1, 1])
Node_3 = Node(3, [7, 4, 1, S])
This is correct. However, does the new Board print out according to the outputs? Thank you!
Posts: 6,827
Threads: 20
Joined: Feb 2020
I'm pretty sure that is wrong. Going down from node 2 or node 3 should not take you to node 1.
|