How to fix list index out of range

longmen · (This post was last modified: Apr-24-2022, 02:25 AM by longmen.)

(Apr-23-2022, 10:32 PM)deanhystad Wrote: I still think your building the board incorrectly. You should not be using insert() to set moves or values.

You should describe how your Q_learning function is supposed to work.

Can you elaborate on insert() to set move or values? Also why you think that the Q_learning function is not working? Thanks

Here is update on the code

import random
import numpy as np
import enum

EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1  # Discount Rate
LEARNING_RATE_ALPHA = 0.3  # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5  # Greedy Probability
ITERATION_MAX_NUM = 10000  # Will be 10,000
START_STATE = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4


class Direction(enum.Enum):
    up = 0
    right = 1
    down = 2
    left = 3


class Node:
    def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
        self.title = title
        self.next = next
        self.qValues = [qValues] * 4
        self.move = [actions] * 4
        self.goal = Goal
        self.forbidden = Forbidden
        self.wall = Wall

    def max_Q_value(self):
        if self.wall:
            return False
        max_q = []
        for q in self.qValues:
            if q is not None:
                max_q.append(q)
        return max(max_q)

    def find_best_move(self):
        max_q = self.max_Q_value()
        q_index = self.qValues.index(max_q)
        return Direction(q_index)


class create_env:
    def __init__(self, input_list, wall=None):
        self.wall = wall
        self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
        S = 2
        Node_1 = Node(1, [5, S, self.wall, self.wall])
        Node_Start = Node(S, [6, 3, self.wall, 1])
        Node_3 = Node(3, [S, 7, 4, self.wall])
        Node_4 = Node(4, [8, self.wall, self.wall, 3])
        Node_5 = Node(5, [9, 6, 1, self.wall])
        Node_6 = Node(6, [10, 7, S, 5])
        Node_7 = Node(7, [11, 8, 3, 6])
        Node_8 = Node(8, [12, self.wall, 4, 7])
        Node_9 = Node(9, [13, 10, 5, self.wall])
        Node_10 = Node(10, [14, 11, 6, 9])
        Node_11 = Node(11, [15, 12, 7, 10])
        Node_12 = Node(12, [16, self.wall, 8, 11])
        Node_13 = Node(13, [self.wall, 14, 9, self.wall])
        Node_14 = Node(14, [self.wall, 15, 10, 13])
        Node_15 = Node(15, [self.wall, 16, 11, 14])
        Node_16 = Node(16, [self.wall, self.wall, 12, 15])

        self.episode[0][0] = Node_1
        self.episode[0][1] = Node_Start
        self.episode[0][S] = Node_3
        self.episode[0][3] = Node_4
        self.episode[1][0] = Node_5
        self.episode[1][1] = Node_6
        self.episode[1][S] = Node_7
        self.episode[1][3] = Node_8
        self.episode[S][0] = Node_9
        self.episode[S][1] = Node_10
        self.episode[S][S] = Node_11
        self.episode[S][3] = Node_12
        self.episode[3][0] = Node_13
        self.episode[3][1] = Node_14
        self.episode[3][S] = Node_15
        self.episode[3][3] = Node_16

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])
        x = 0
        while x < LEVEL:
            y = 0
            while y < LEVEL:
                current_episode = self.episode[x][y]
                if current_episode.title in self.goal_labels:
                    current_episode.goal = 1
                    current_episode.move.append(4)
                    current_episode.qValues.append(4)
                elif current_episode.title == self.forbidden_label:
                    current_episode.forbidden = 1
                    current_episode.move.append(4)
                    current_episode.qValues.append(4)
                elif current_episode.title == self.wall_label:
                    current_episode.wall = 1
                else:
                    position = 0
                    while position < LEVEL:
                        if current_episode.next[position] is not None:
                            current_episode.move.append(Direction(position)), current_episode.qValues.insert(
                                position, False)
                        position += 1
                y += 1
            x += 1

    def get_episode(self, name):
        for x in self.episode:
            for episode in x:
                if episode.title == name:
                    # print(episode)
                    return episode

    def print_best_actions(self):
        for row in self.episode:
            for episode in row:
                if episode.goal:
                    best_action_str = 'Direction.goal'
                elif episode.forbidden:
                    best_action_str = "Direction.forbid"
                elif episode.wall:
                    best_action_str = 'Direction.wall-square'
                else:
                    best_action_str = str(episode.find_best_move())
                print(str(episode.title) + " " + best_action_str[10:])

    def print_four_Q_value(self, index):
        episode = self.get_episode(index)
        print("up" + ' ' + str(round(episode.qValues[1], 2)))
        print("right" + ' ' + str(round(episode.qValues[2], 2)))
        print("down" + ' ' + str(round(episode.qValues[3], 2)))
        print("left" + ' ' + str(round(episode.qValues[0], 2)))


def Q_learning(environment, print_best_actions, index):
    for iteration in range(ITERATION_MAX_NUM):
        current_episode = environment.get_episode(START_STATE)
        total_episode_reward = 0
        for episode in range(ITERATION_MAX_NUM):
            if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
                next_move = []
                for score in current_episode.move:
                    if score is not None:
                        next_move.append(score)
                        # print(score)
                next_move = random.choice(next_move)
            else:
                next_move = current_episode.find_best_move()
            next_episode = environment.get_episode(current_episode.next[next_move.value])
            if next_episode.goal:
                reward = GOAL_SQUARE_REWARD
            elif next_episode.forbidden:
                reward = FORBIDDEN_SQUARE_REWARD
            else:
                reward = EACH_STEP_REWARD
            total_episode_reward += reward

            old_q = current_episode.qValues[next_move.value]
            new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
            # print(new_q)
            current_episode.qValues[next_move.value] = new_q
            # print(current_episode.qValues[next_move.value])
            if next_episode.goal:
                break
            elif next_episode.forbidden:
                break
            else:
                if next_episode.wall:
                    break
                else:
                    current_episode = next_episode


def user_input():
    try:
        input_list = []
        input_str = input()
        input_list = input_str.split()
    except:
        print("The input should be like: 15 12 8 6 p")

    environment = create_env(input_list)

    if (len(input_list) == 5) and (input_list[-1] == 'p'):
        Q_learning(environment, 1, 0)
        environment.print_best_actions()
    elif (len(input_list) == 6) and (input_list[-2] == 'q'):
        Q_learning(environment, 0, int(input_list[5]))
        environment.print_four_Q_value(int(input_list[5]))


user_input()

**deanhystad** · Apr-24-2022, 02:59 AM

As far as this:

Quote:Can you elaborate on insert() to set move or values?

What do you think list.insert(value) does? It is not the correct way to SET values in a list.

I would like you to describe your Q learning algorithm. I cannot tell if it contains errors in logic or execution or my not understanding your requirements without knowing what you think it is supposed to do. For example, you always start at the same square but it appears that you want a solution for the entire map. I can design a map where that is not possible. It also seems a very inefficient way to generate a map, and even with 10000 iterations there remains a possibility that you do not try all routes. You calculate a total episode reward that is never used. You pass arguments to Q_learning() that are not used.

I think I implemented your algorithm and I get this result for inputs: 15 12 8 6 p

Output:13 → 14 → 15 G 16 ←
 9 ↑ 10 ↑ 11 ↑ 12 G
 5 ↑  6 W  7 ↑  8 F
 1 ↑  2 →  3 ↑  4 ←

I think this is correct. 16 cannot be up as you indicate in your expected results. 16 does not have an "Up" neighbor.

longmen · Apr-24-2022, 02:51 PM

(Apr-24-2022, 02:59 AM)deanhystad Wrote: As far as this:

Quote:Can you elaborate on insert() to set move or values?
What do you think list.insert(value) does? It is not the correct way to SET values in a list.

I would like you to describe your Q learning algorithm. I cannot tell if it contains errors in logic or execution or my not understanding your requirements without knowing what you think it is supposed to do. For example, you always start at the same square but it appears that you want a solution for the entire map. I can design a map where that is not possible. It also seems a very inefficient way to generate a map, and even with 10000 iterations there remains a possibility that you do not try all routes. You calculate a total episode reward that is never used. You pass arguments to Q_learning() that are not used.

I think I implemented your algorithm and I get this result for inputs: 15 12 8 6 p
Output:13 → 14 → 15 G 16 ←
 9 ↑ 10 ↑ 11 ↑ 12 G
 5 ↑  6 W  7 ↑  8 F
 1 ↑  2 →  3 ↑  4 ←
I think this is correct. 16 cannot be up as you indicate in your expected results. 16 does not have an "Up" neighbor.

But my assignment requires it to go up at 16 index :/

**deanhystad** · Apr-24-2022, 03:31 PM

If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall.

Though your solution is inefficient I think it works. Any problems you have are probably errors setting up the board.

longmen · Apr-24-2022, 03:42 PM

(Apr-24-2022, 03:31 PM)deanhystad Wrote: If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall.

Though your solution is inefficient I think it works. Any problems you have are probably errors setting up the board.

Hi, I figured out the issue. There is one more thing I hope you could advise me. With this input 15 12 8 6 q 11 it prints out

Action.UP 99.99999999999999
Action.UP 99.99999999999999
Action.DOWN 0.8899999999999998
Action.DOWN 0.8899999999999998

However, I want it to update from UP to RIGHT if it prints UP for the second time and print out LEFT at the fourth output if it shows up as DOWN. Its output should always print out as UP RIGHT DOWN LEFT. Could you please advise? Thanks for all your responses!

**deanhystad** · (This post was last modified: Apr-24-2022, 05:21 PM by deanhystad.)

You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected.

Did you change your search order from LURD to URDL? Is that what "fixed" your output? If so, that just means that 16 was never evaluated. This makes sense because it is not a neighbor of any open (not goal, forbidden or wall) tile.

longmen · Apr-24-2022, 05:22 PM

(Apr-24-2022, 03:58 PM)deanhystad Wrote: You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected.

I figured out that also. However, there is one more issue. As it shown below, UP value is wrong sometimes and DOWN value is always wrong. I wonder if you could help? I also provide the code below. Thanks
10 8 9 6 q 2

My output
up -0.1
right 0.89
down -0.1
left -0.1

Expected Output
up -0.01
right 0.89
down -0.01
left -0.1

12 7 5 6 q 3
My output
up 100.0
right 0.89
down -0.01
left 0.89

Expected Output
up 100.0
right 0.89
down 9.9
left 0.89

import random
import numpy as np
import enum

EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1  # Discount Rate
LEARNING_RATE_ALPHA = 0.3  # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5  # Greedy Probability
ITERATION_MAX_NUM = 1000000  # Will be 10,000
START_LABEL = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4


class Direction(enum.Enum):
    up = 0
    right = 1
    down = 2
    left = 3


class Node:
    def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
        self.title = title
        self.next = next
        self.qValues = [qValues] * 5
        self.move = [actions] * 5
        self.goal = Goal
        self.forbidden = Forbidden
        self.wall = Wall

    def max_Q_value(self):
        if self.wall:
            return False
        max_q = []
        for q in self.qValues:
            if q is not None:
                max_q.append(q)
        return max(max_q)

    def find_best_move(self):
        max_q = self.max_Q_value()
        q_index = self.qValues.index(max_q)
        return Direction(q_index)


class create_env:
    def __init__(self, input_list, wall=None):
        self.wall = wall
        self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
        S = 2
        Node_1 = Node(1, [5, S, 1, 1])
        Node_Start = Node(S, [6, 3, 1, 1])
        Node_3 = Node(3, [7, 4, 1, S])
        Node_4 = Node(4, [8, 1, 1, 3])

        Node_5 = Node(5, [9, self.wall, 1, 1])
        Node_6 = Node(6, [10, 7, S, 5])
        Node_7 = Node(7, [11, 8, 3, 6])
        Node_8 = Node(8, [12, self.wall, 4, 7])
        Node_9 = Node(9, [13, 10, 5, 1])
        Node_10 = Node(10, [14, 11, 6, 9])
        Node_11 = Node(11, [15, 12, 7, 10])
        Node_12 = Node(12, [16, self.wall, 8, 11])
        Node_13 = Node(13, [1, 14, 9, self.wall])
        Node_14 = Node(14, [1, 15, 10, 13])
        Node_15 = Node(15, [1, 16, 11, 14, ])
        Node_16 = Node(16, [1, self.wall, 12, 1])

        self.episode[0][0] = Node_1
        self.episode[0][1] = Node_Start
        self.episode[0][S] = Node_3
        self.episode[0][3] = Node_4
        self.episode[1][0] = Node_5
        self.episode[1][1] = Node_6
        self.episode[1][S] = Node_7
        self.episode[1][3] = Node_8
        self.episode[S][0] = Node_9
        self.episode[S][1] = Node_10
        self.episode[S][S] = Node_11
        self.episode[S][3] = Node_12
        self.episode[3][0] = Node_13
        self.episode[3][1] = Node_14
        self.episode[3][S] = Node_15
        self.episode[3][3] = Node_16

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])
        x = 0
        while x < LEVEL:
            y = 0
            while y < LEVEL:
                current_episode = self.episode[x][y]
                if current_episode.title in self.goal_labels:
                    current_episode.goal = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.forbidden_label:
                    current_episode.forbidden = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.wall_label:
                    current_episode.wall = 1
                else:
                    position = 0
                    while position < LEVEL:
                        if current_episode.next[position] is not None:
                            current_episode.move.insert(position,
                                                        Direction(position)), current_episode.qValues.insert(
                                position, False)
                        position += 1
                y += 1
            x += 1

    def get_episode(self, name):
        for x in self.episode:
            for episode in x:
                if episode.title == name:
                    # print(episode)
                    return episode

    def print_best_actions(self):
        for row in self.episode:
            for episode in row:
                if episode.goal:
                    best_action_str = 'Direction.goal'
                elif episode.forbidden:
                    best_action_str = "Direction.forbid"
                elif episode.wall:
                    best_action_str = 'Direction.wall-square'
                else:
                    best_action_str = str(episode.find_best_move())
                print(str(episode.title) + " " + best_action_str[10:])

    def print_four_Q_value(self, index):
        episode = self.get_episode(index)
        print("up" + ' ' + str(round(episode.qValues[0], 2)))
        print("right" + ' ' + str(round(episode.qValues[1], 2)))
        print("down" + ' ' + str(round(episode.qValues[2], 2)))
        print("left" + ' ' + str(round(episode.qValues[3], 2)))



def Q_learning(environment, print_best_actions, index):
    for iteration in range(ITERATION_MAX_NUM):
        current_episode = environment.get_episode(START_LABEL)
        total_episode_reward = 0
        for episode in range(10000):
            if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
                next_move = []
                for score in current_episode.move:
                    if score is not None:
                        next_move.append(score)
                next_move = random.choice(next_move)
            else:
                next_move = current_episode.find_best_move()
            next_episode = environment.get_episode(current_episode.next[next_move.value])
            if next_episode.goal:
                reward = GOAL_SQUARE_REWARD
            elif next_episode.forbidden:
                reward = FORBIDDEN_SQUARE_REWARD
            else:
                reward = EACH_STEP_REWARD
            total_episode_reward += reward

            old_q = current_episode.qValues[next_move.value]
            new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
            current_episode.qValues[next_move.value] = new_q
            if next_episode.goal:
                break
            elif next_episode.forbidden:
                break
            else:
                if next_episode.wall:
                    break
                else:
                    current_episode = next_episode


def user_input():
    try:
        input_list = []
        input_str = input()
        input_list = input_str.split()
    except:
        print("The input should be like: 15 12 8 6 p")

    environment = create_env(input_list)

    if (len(input_list) == 5) and (input_list[-1] == 'p'):
        Q_learning(environment, 1, 0)
        environment.print_best_actions()
    elif (len(input_list) == 6) and (input_list[-2] == 'q'):
        Q_learning(environment, 0, int(input_list[5]))
        environment.print_four_Q_value(int(input_list[5]))


user_input()

**deanhystad** · (This post was last modified: Apr-24-2022, 07:14 PM by deanhystad.)

Is this correct?

        Node_Start = Node(S, [6, 3, 1, 1])
        Node_3 = Node(3, [7, 4, 1, S])

It think your board is still riddled with errors. You would be better off making the wall programmatically. You make too many mistakes when doing it by hand.

This is how I make my board. I use dictionaries instead of arrays

class Action(Enum):
    """Enumeration for movements (actions). Value is (row, column) steps to make move"""
    UP = (1, 0)
    RIGHT = (0, 1)
    DOWN = (-1, 0)
    LEFT = (0, -1)

    def __str__(self):
        """Return arrow associated with action""""
        if self is self.LEFT:
            return '\u2190'
        elif self is self.UP:
            return '\u2191'
        elif self is self.RIGHT:
            return '\u2192'
        elif self is self.DOWN:
            return '\u2193'
        return " "

class Board:
    def __init__(self, goals, traps, walls, rows=4, columns=4):
        """Initialize the board"""
        # Make all the tiles
        self.rows = rows
        self.columns = columns
        self.tiles = {x:Tile(x, x in goals, x in traps, x in walls) for x in range(1, rows*columns+1)}

        # Tell open tiles about their neighbors
        for label in set(self.tiles) - set(goals + traps + walls):
            tile = self.tiles[label]
            for action in Action:
                r, c = action.value
                neighbor = self.tile(r + (label-1) // columns, c + (label-1) % columns)
                # Make a list of neighboring tiles
                tile.neighbor[action] = neighbor if neighbor and not neighbor.wall
                tile.values[action] = 1 if neighbor and neighbor.wall else 0

    def tile(self, label, column):
        """Return tile at intersection of row and column.  Return None if row or column out of range"""
        if 0 <= label < self.rows and 0 <= column < self.columns:
            return self.tiles[label * self.columns + column + 1]
        return None

longmen · Apr-24-2022, 10:45 PM

(Apr-24-2022, 07:14 PM)deanhystad Wrote: Is this correct?

        Node_Start = Node(S, [6, 3, 1, 1])
        Node_3 = Node(3, [7, 4, 1, S])

It think your board is still riddled with errors. You would be better off making the wall programmatically. You make too many mistakes when doing it by hand.

This is how I make my board. I use dictionaries instead of arrays

class Action(Enum):
    """Enumeration for movements (actions). Value is (row, column) steps to make move"""
    UP = (1, 0)
    RIGHT = (0, 1)
    DOWN = (-1, 0)
    LEFT = (0, -1)

    def __str__(self):
        """Return arrow associated with action""""
        if self is self.LEFT:
            return '\u2190'
        elif self is self.UP:
            return '\u2191'
        elif self is self.RIGHT:
            return '\u2192'
        elif self is self.DOWN:
            return '\u2193'
        return " "

class Board:
    def __init__(self, goals, traps, walls, rows=4, columns=4):
        """Initialize the board"""
        # Make all the tiles
        self.rows = rows
        self.columns = columns
        self.tiles = {x:Tile(x, x in goals, x in traps, x in walls) for x in range(1, rows*columns+1)}

        # Tell open tiles about their neighbors
        for label in set(self.tiles) - set(goals + traps + walls):
            tile = self.tiles[label]
            for action in Action:
                r, c = action.value
                neighbor = self.tile(r + (label-1) // columns, c + (label-1) % columns)
                # Make a list of neighboring tiles
                tile.neighbor[action] = neighbor if neighbor and not neighbor.wall
                tile.values[action] = 1 if neighbor and neighbor.wall else 0

    def tile(self, label, column):
        """Return tile at intersection of row and column.  Return None if row or column out of range"""
        if 0 <= label < self.rows and 0 <= column < self.columns:
            return self.tiles[label * self.columns + column + 1]
        return None

Node_Start = Node(S, [6, 3, 1, 1])
Node_3 = Node(3, [7, 4, 1, S])
This is correct. However, does the new Board print out according to the outputs? Thank you!

**deanhystad** · Apr-24-2022, 11:14 PM

I'm pretty sure that is wrong. Going down from node 2 or node 3 should not take you to node 1.

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	list index out of range	OliverG	3	3,435	Sep-03-2021, 12:04 AM Last Post: itsmycode
	Index List	a04j	2	3,721	Jul-10-2021, 01:14 PM Last Post: BashBedlam
	List index out of range when turning CSV into dict	ranbarr	15	10,861	May-12-2021, 10:38 AM Last Post: ranbarr
	List vs index:	Frederico_Caldas	5	5,171	Jul-03-2020, 10:55 AM Last Post: DeaD_EyE
	To find the index of the first occurrence of the key in the provided list	Angry_bird89	4	4,508	Jun-20-2020, 06:53 PM Last Post: Angry_bird89
	list index out of range	mcgrim	2	4,045	May-25-2019, 07:44 PM Last Post: mcgrim
	IndexError: list index out of range	abdullahali	4	5,106	Jan-17-2019, 07:54 AM Last Post: buran
	String index out of range	felie04	2	6,564	Aug-17-2018, 11:18 PM Last Post: felie04
	Accessing data in zip - Index out of range	pythoneer	24	16,730	Mar-15-2018, 06:19 PM Last Post: buran
	"List index out of range" for output values	pegn305	3	6,316	Nov-26-2017, 02:20 PM Last Post: heiner55

How to fix list index out of range

User Panel Messages

Announcements