How to fix list index out of range

longmen · (This post was last modified: Apr-23-2022, 07:51 PM by longmen.)

It has been three days and I could not make much progress. This is how close I am getting to the expected result. However, it still does not print out the out put that I want.
For example with this
with this input 15 12 8 6 p
I expect this result
1 up
2 right
3 up
4 left
5 up
6 wall-square
7 up
8 forbid
9 up
10 up
11 up
12 goal
13 right
14 right
15 goal
16 up

with this input 15 12 8 6 q 11
i am expecting this

up 100.0
right 100.0
down 0.89
left 0.89

Updated: I am able to print out correctly with this input 15 12 8 6 q 11. However, it throws an error when I tried different inputs such as
12 7 5 6 q 3

Traceback (most recent call last):
File "C:", line 200, in <module>
user_input()
File "C:", line 197, in user_input
environment.print_four_Q_value(int(input_list[5]))
File "C:", line 142, in print_four_Q_value
print("down" + ' ' + str(round(episode.qValues[3], 2)))
TypeError: type NoneType doesn't define __round__ method
up 100.0
right 0.89

The expected output is
up 100.0
right 0.89
down 9.9
left 0.89

I wonder if you have anymore advice?

import random
import numpy as np
import enum

EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1  # Discount Rate
LEARNING_RATE_ALPHA = 0.3  # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5  # Greedy Probability
ITERATION_MAX_NUM = 10000  # Will be 10,000
START_LABEL = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4


class Direction(enum.Enum):
    up = 1
    right = 2
    down = 3
    left = 0


class Node:
    def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
        self.title = title
        self.next = next
        self.qValues = [qValues] * 5
        self.move = [actions] * 5
        self.goal = Goal
        self.forbidden = Forbidden
        self.wall = Wall

    def max_Q_value(self):
        if self.wall:
            return False
        max_q = []
        for q in self.qValues:
            if q is not None:
                max_q.append(q)
        return max(max_q)

    def find_best_move(self):
        max_q = self.max_Q_value()
        q_index = self.qValues.index(max_q)
        return Direction(q_index)


class create_env:
    def __init__(self, input_list, wall=None):
        self.wall = wall
        self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
        S = 2
        Node_1 = Node(1, [self.wall, 5, S, self.wall])
        Node_Start = Node(S, [1, 6, 3, self.wall])
        Node_3 = Node(3, [S, 7, 4, self.wall])
        Node_4 = Node(4, [3, 8, self.wall, self.wall])
        Node_5 = Node(5, [self.wall, 9, 6, 1])
        Node_6 = Node(6, [5, 10, 7, S])
        Node_7 = Node(7, [6, 11, 8, 3])
        Node_8 = Node(8, [7, 12, self.wall, 4])
        Node_9 = Node(9, [self.wall, 13, 10, 5])
        Node_10 = Node(10, [9, 14, 11, 6])
        Node_11 = Node(11, [10, 15, 12, 7])
        Node_12 = Node(12, [11, 16, self.wall, 8])
        Node_13 = Node(13, [self.wall, self.wall, 14, 9])
        Node_14 = Node(14, [13, self.wall, 15, 10])
        Node_15 = Node(15, [14, self.wall, 16, 11])
        Node_16 = Node(16, [15, self.wall, self.wall, 12])

        self.episode[0][0] = Node_1
        self.episode[0][1] = Node_Start
        self.episode[0][S] = Node_3
        self.episode[0][3] = Node_4
        self.episode[1][0] = Node_5
        self.episode[1][1] = Node_6
        self.episode[1][S] = Node_7
        self.episode[1][3] = Node_8
        self.episode[S][0] = Node_9
        self.episode[S][1] = Node_10
        self.episode[S][S] = Node_11
        self.episode[S][3] = Node_12
        self.episode[3][0] = Node_13
        self.episode[3][1] = Node_14
        self.episode[3][S] = Node_15
        self.episode[3][3] = Node_16

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])
        x = 0
        while x < LEVEL:
            y = 0
            while y < LEVEL:
                current_episode = self.episode[x][y]
                if current_episode.title in self.goal_labels:
                    current_episode.goal = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.forbidden_label:
                    current_episode.forbidden = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.wall_label:
                    current_episode.wall = 1
                else:
                    position = 0
                    while position < LEVEL:
                        if current_episode.next[position] is not None:
                            current_episode.move.insert(position,
                                                        Direction(position)), current_episode.qValues.insert(
                                position, False)
                        position += 1
                y += 1
            x += 1

    def get_episode(self, name):
        for x in self.episode:
            for episode in x:
                if episode.title == name:
                    # print(episode)
                    return episode

    def print_best_actions(self):
        for row in self.episode:
            for episode in row:
                if episode.goal:
                    best_action_str = 'Direction.goal'
                elif episode.forbidden:
                    best_action_str = "Direction.forbid"
                elif episode.wall:
                    best_action_str = 'Direction.wall-square'
                else:
                    best_action_str = str(episode.find_best_move())
                print(str(episode.title) + " " + best_action_str[10:])

    def print_four_Q_value(self, index):
        episode = self.get_episode(index)
        print("up" + ' ' + str(round(episode.qValues[1], 2)))
        print("right" + ' ' + str(round(episode.qValues[2], 2)))
        print("down" + ' ' + str(round(episode.qValues[3], 2)))
        print("left" + ' ' + str(round(episode.qValues[0], 2)))


def Q_learning(environment, print_best_actions, index):
    for iteration in range(ITERATION_MAX_NUM):
        current_episode = environment.get_episode(START_LABEL)
        total_episode_reward = 0
        for episode in range(100):
            if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
                next_move = []
                for score in current_episode.move:
                    if score is not None:
                        next_move.append(score)
                next_move = random.choice(next_move)
            else:
                next_move = current_episode.find_best_move()
            next_episode = environment.get_episode(current_episode.next[next_move.value])
            if next_episode.goal:
                reward = GOAL_SQUARE_REWARD
            elif next_episode.forbidden:
                reward = FORBIDDEN_SQUARE_REWARD
            else:
                reward = EACH_STEP_REWARD
            total_episode_reward += reward

            old_q = current_episode.qValues[next_move.value]
            new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
            current_episode.qValues[next_move.value] = new_q
            if next_episode.goal:
                break
            elif next_episode.forbidden:
                break
            else:
                if next_episode.wall:
                    break
                else:
                    current_episode = next_episode


def user_input():
    try:
        input_list = []
        input_str = input()
        input_list = input_str.split()
    except:
        print("The input should be like: 15 12 8 6 p")

    environment = create_env(input_list)

    if (len(input_list) == 5) and (input_list[-1] == 'p'):
        Q_learning(environment, 1, 0)
        environment.print_best_actions()
    elif (len(input_list) == 6) and (input_list[-2] == 'q'):
        Q_learning(environment, 0, int(input_list[5]))
        environment.print_four_Q_value(int(input_list[5]))


user_input()

Possibly Related Threads…
Thread		Author	Replies	Views	Last Post
	list index out of range	OliverG	3	2,478	Sep-03-2021, 12:04 AM Last Post: itsmycode
	Index List	a04j	2	3,065	Jul-10-2021, 01:14 PM Last Post: BashBedlam
	List index out of range when turning CSV into dict	ranbarr	15	6,950	May-12-2021, 10:38 AM Last Post: ranbarr
	List vs index:	Frederico_Caldas	5	3,825	Jul-03-2020, 10:55 AM Last Post: DeaD_EyE
	To find the index of the first occurrence of the key in the provided list	Angry_bird89	4	3,521	Jun-20-2020, 06:53 PM Last Post: Angry_bird89
	list index out of range	mcgrim	2	3,028	May-25-2019, 07:44 PM Last Post: mcgrim
	IndexError: list index out of range	abdullahali	4	4,028	Jan-17-2019, 07:54 AM Last Post: buran
	String index out of range	felie04	2	5,682	Aug-17-2018, 11:18 PM Last Post: felie04
	Accessing data in zip - Index out of range	pythoneer	24	13,514	Mar-15-2018, 06:19 PM Last Post: buran
	"List index out of range" for output values	pegn305	3	5,459	Nov-26-2017, 02:20 PM Last Post: heiner55

How to fix list index out of range

User Panel Messages

Announcements