Python Forum

Full Version: Transform 2d plot into 1d plot for som
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi everyone!
I'm trying to understand self-organising map neural network with Python. Here's a code:

import numpy as np
from numpy.ma.core import ceil
from scipy.spatial import distance
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib import animation, colors

data_file = "wine.data"
data_x = np.loadtxt(data_file, delimiter=",", skiprows=0, usecols=range(1,12), dtype=np.float64)
data_y = np.loadtxt(data_file, delimiter=",", skiprows=0, usecols=(0,), dtype=np.int64)


# train & test split
train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=0.2, random_state=42)
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

# data normalisation
def minmax_scaler(data): 
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(data)
    return scaled

# euclidean distance
def e_distance(x, y): 
    return distance.euclidean(x,y)

# manhattan distance
def m_distance(x, y): 
    return distance.cityblock(x,y)

# best matching unit search
def winning_neuron(data, t, som, num_rows, num_cols): 
    winner = [0,0]
    shortest_distance = np.sqrt(data.shape[1])
    input_data = data[t]
    for row in range(num_rows): 
        for col in range(num_cols): 
            distance = e_distance(som[row][col], data[t])
            if distance < shortest_distance: 
                shortest_distance = distance
                winner = [row, col]
    return winner

# learning rate & neighbourhood range calculation
def decay(step, max_steps, max_learning_rate, max_m_distance): 
    coefficient = 1.0 - (np.float64(step)/max_steps)
    learning_rate = coefficient*max_learning_rate
    neighbourhood_range = ceil(coefficient * max_m_distance)
    return learning_rate, neighbourhood_range

num_rows = 10
num_cols = 10
max_m_distance = 4
max_learning_rate = 0.5
max_steps = int(7.5*10e3)

# main
train_x_norm = minmax_scaler(train_x)

# init smo
num_dims = train_x_norm.shape[1]
np.random.seed(40)
som = np.random.random_sample(size=(num_rows, num_cols, num_dims))

# start train
for step in range(max_steps):
    if (step+1) % 1000 == 0:
        print("Iteration: ", step+1)
    learning_rate, neighbourhood_range = decay(step, max_steps, max_learning_rate, max_m_distance)

    t = np.random.randint(0, high=train_x_norm.shape[0])
    winner = winning_neuron(train_x_norm, t, som, num_rows, num_cols)
    for row in range(num_rows): 
        for col in range(num_cols): 
            if m_distance([row, col], winner) <= neighbourhood_range: 
                som[row][col] += learning_rate * (train_x_norm[t] - som[row][col])
print("SOM training completed")

# collecting labels
label_data = train_y
map = np.empty(shape=(num_rows, num_cols), dtype=object)

for row in range(num_cols): 
    for col in range(num_cols): 
        map[row][col] = []

for t in range(train_x_norm.shape[0]): 
    if (t+1) % 1000 == 0: 
        print("sample data: ", t+1)
    winner = winning_neuron(train_x_norm, t, som, num_rows, num_cols)
    map[winner[0]][winner[1]].append(label_data[t])

# construct label map
label_map = np.zeros(shape=(num_rows, num_cols), dtype=np.int64)
for row in range(num_rows): 
    for col in range(num_cols): 
        label_list = map[row][col]
        if len(label_list) == 0: 
            label = 2
        else: 
            label = max(label_list, key=label_list.count)
        label_map[row][col] = label

title = ('Iteration '+ str(max_steps))
cmap = colors.ListedColormap(["tab:green", "tab:red", "tab:orange"])
plt.imshow(label_map, cmap=cmap)
plt.colorbar()
plt.title(title)
plt.show()

# test data
data = minmax_scaler(test_x)
winner_labels = []

for t in range(data.shape[0]):
    winner = winning_neuron(data, t, som, num_rows, num_cols)
    row = winner[0]
    col = winner[1]
    predicted = label_map[row][col]
    winner_labels.append(predicted)

print("accuracy: ", accuracy_score(test_y, np.array(winner_labels)))
I'm using Wine dataset. You can download it from here

What I need now is to show results in 1d plot, now I can view 2d plot, this is how it looks now
[Image: ORfBp.png]

And this is what I want to see (this is just an example how it should look like):
[Image: KxCB2.png]

Please help transform two-dimensional into one-dimensional results. Thank you!