Python Forum

I wanted to try my hand at some machine learning, so I started with this project which already had data provided by mnist. I studied the concepts and algorithms of how machine learning works online and then decided to put my knowledge to the test by not using any libraries such as PyTorch. I successfully initialized weights and biases, did forward propagation, and attempted to implement the back propagation.
To explain a bit of the code so it's easier to understand, the weights are kept within a numpy array, which are indexed as such: weights[layer][neuron_k][neuron_j] (where neuron_j is in the next layer), for biases: biases[layers][neuron_k]. I used sigmoid for the activation function. During back propagation I figure out a value which I want to add to a weight or bias to change it and save it in an array which covers one layer, and compile one for each layer within a list. This list is within another list which contains other lists with changes, like so: self.weight_changes = [[numpy array for layer 1-2 weights, layer 2-3, ...], [layer 1-2, layer 2-3, ...], ...]. The function commit_changes commits all the changes that are in self.weight_changes and self.bias_changes. The code looks at a neuron, it's error (how far it is from the expected value), and changes the weight based on the error to get the neuron closer to the wanted value. Then the neuron in the layer behind is given an expected value the same way.
Now the problem is that the predictions are all wrong when I run the test function, and I'm sure it's because I've not properly implemented the back propagation. I would appreciate if anyone can point out mistakes I made or point me in the right direction. Thank you for taking time to read this.

main.py

from data_handler import DataHandler
import random
import time
import math
import numpy as np


def check(arr):  # gets rid of any nan
    for num_i in range(len(arr)):
        if np.isnan(arr[num_i]):
            arr[num_i] = 0.0
    return arr


def sigmoid(arr):
    arr = 1 / (1 + np.exp(-arr))
    return check(arr)


def get_desired_output(number):
    output = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    output[number] = 1
    return output


class AI:
    def __init__(self, nodes):
        self.nodes = nodes

        self.weights = np.array([[[
            random.uniform(-1/math.sqrt(nodes[node_i]), 1/math.sqrt(nodes[node_i]))
            for _ in range(nodes[node_i+1])]
            for _ in range(nodes[node_i])]
            for node_i in range(len(nodes)-1)])
        self.weight_changes = []

        self.biases = np.array([[0 for _ in range(nodes[node_i])]
                                for node_i in range(1, len(nodes))])
        self.bias_changes = []

        self.output = []
        self.rate = 0.5
        self.activation = sigmoid

    def get_empty_weights(self, layer):
        return np.empty((self.nodes[layer], self.nodes[layer+1]))

    def get_empty_biases(self, layer):
        return np.empty(self.nodes[layer])

    def train_neurons(self, input_data, number):
        self.output = [input_data]
        self.forwardprop(input_data, 0)
        self.backprop(number)
        self.output = []

    def forwardprop(self, values, layer, output=True):
        next_layer = np.empty((self.nodes[layer+1],))
        for node_i in range(self.nodes[layer+1]):
            next_layer[node_i] += sum([
                values[value_i]*self.weights[layer][value_i][node_i]
                for value_i in range(len(values))]) + self.biases[layer][node_i]
        next_layer = self.activation(next_layer)

        if output:
            self.output.append(next_layer)
        if layer == len(self.nodes)-2:
            return next_layer

        return self.forwardprop(next_layer, layer+1)

    def backprop(self, number):
        expected_outputs = list(self.output)
        expected_outputs[-1] = np.array(get_desired_output(number))

        self.weight_changes.append([])
        self.bias_changes.append([])

        for index_l in range(len(self.output)-1):
            index_l = len(self.output)-1-index_l
            layer = self.output[index_l]
            empty_weights = self.get_empty_weights(index_l-1)
            empty_biases = self.get_empty_biases(index_l)
            for index_n in range(len(layer)):
                expected_output = expected_outputs[index_l][index_n]
                error = expected_output - layer[index_n]
                empty_biases[index_n] = error*self.rate
                weights = self.get_neuron_weights(index_l, index_n)
                for index_w in range(len(weights)):
                    if self.output[index_l-1][index_w] == 0:
                        empty_weights[index_w][index_n] = 0
                        continue
                    negative_or_positive = self.output[index_l-1][index_w]/abs(self.output[index_l-1][index_w])
                    empty_weights[index_w][index_n] = error*self.rate*negative_or_positive

                if index_l != 1:  # If index_l == 1, then we are on the second layer; not changing the first layer (input)
                    for index_o in range(len(expected_outputs[index_l-1])):
                        negative_or_positive = self.weights[index_l-1][index_o][index_n]/abs(self.weights[index_l-1][index_o][index_n])
                        expected_outputs[index_l-1][index_o] += error*self.rate/10*negative_or_positive

            self.weight_changes[-1].insert(0, empty_weights)
            self.bias_changes[-1].insert(0, empty_biases)

    def get_neuron_weights(self, layer, target_neuron):
        # All the weights which affect one specific neuron's value
        return np.array([neuron[target_neuron] for neuron in self.weights[layer-1]])

    def commit_changes(self, data):
        for weight_change in self.weight_changes:
            for index_l in range(len(weight_change)):
                for index_k in range(len(weight_change[index_l])):
                    for index_j in range(len(weight_change[index_l][index_k])):
                        self.weights[index_l][index_k][index_j] += weight_change[index_l][index_k][index_j]
        self.weight_changes = []

        for bias_change in self.bias_changes:
            for index_l in range(len(bias_change)):
                for index_k in range(len(bias_change[index_l])):
                    self.biases[index_l][index_k] += bias_change[index_l][index_k]
        self.bias_changes = []

        batch = list(data.get_testing_batch())
        error = 0
        for info in batch:
            results = self.forwardprop(np.array(info[0]), 0, False)
            desired_results = get_desired_output(info[1])
            output_sum = np.sum([(results[output] - desired_results[output]) ** 2 for output in range(len(results))])
            error += output_sum / len(results)
        error /= len(batch)
        self.rate = error

    def test(self, data):
        batch = list(data.get_testing_batch())
        for info in batch:
            results = self.forwardprop(np.array(info[0]), 0, False)
            desired_results = get_desired_output(info[1])
            output_sum = np.sum([(results[output] - desired_results[output])**2 for output in range(len(results))])
            print(f'Error margin: {output_sum/len(results)}')
            print(f"Prediction: {np.argmax(results)} | Number: {info[1]}")
            print(results)
            print('____________________________')


def main():
    data = DataHandler()
    ai = AI([784, 16, 16, 10])
    for _ in range(100):
        batch = list(data.get_batch())
        for number in batch:
            ai.train_neurons(np.array(number[0]), number[1])
        ai.commit_changes(data)
    ai.test(data)
    print(f"Total run time: {time.perf_counter()} seconds")


if __name__ == '__main__':
    main()

data_handler.py

from mnist import MNIST


class DataHandler:
    def __init__(self):
        data = MNIST('MNIST_DATA')
        self.images, self.labels = data.load_training()
        self.tImages, self.tLabels = data.load_testing()
        self.tIndex = -1
        self.index = -1

    def get_batch(self):
        if self.index + 10 >= len(self.images):
            self.index = -1
        for number in range(10):
            self.index += 1
            yield self.images[self.index], number

    def get_testing_batch(self):
        if self.tIndex + 10 >= len(self.tImages):
            self.tIndex = -1
        for number in range(10):
            self.tIndex += 1
            yield self.tImages[self.tIndex], number

SheeppOSU