PCA analysis on blobs - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Data Science (https://python-forum.io/forum-44.html) +--- Thread: PCA analysis on blobs (/thread-34739.html) |
PCA analysis on blobs - erdemath - Aug-26-2021 I am trying to do PCA analysis on the classical makeblobs routine. There is a problem with the drawing the arrows. The code and the error message are below. import random import numpy as np from sklearn.decomposition import PCA import matplotlib.pyplot as plt import matplotlib.colors from sklearn.model_selection import train_test_split from sklearn.datasets import make_blobs import pandas as pd class PCA_trials2(): def __init__(self): self.LayerNo = 3 self.LayerIdx = np.asarray([1, 2, 3]) self.MaxPercNo = 735 # Maximum perceptron, neuron, numbers per layer self.InitPerNo = 4 # Number of perceptrons in the input layer self.LengthSample = 100 self.delta = 0.01 self.Event = self.InitPerNo # Number of detected events initially self.PercNo_Layer = np.asarray([60, 20, 60]) def draw_vector(self, v0, v1, ax=None): ax = ax or plt.gca() arrowprops=dict(arrowstyle='->', linewidth=2, shrinkA=0, shrinkB=0) ax.annotate('', v1, v0, arrowprops=arrowprops) def GenerateDataandPCA(self): # creating my own color map for better visualization my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red", "yellow", "green"]) # Generating data with multi class data, labels = make_blobs(n_samples=self.LengthSample, centers=1, n_features=self.PercNo_Layer[0], random_state=0) # Add some noise noise = np.asarray([np.random.normal(0, 1, size=self.LengthSample) for _ in range(self.PercNo_Layer[0])]).reshape(self.LengthSample, self.PercNo_Layer[0]) data_noisy = data + noise # converting the multi-class to binary labels_orig = labels labels_binary = np.mod(labels_orig, 2) X_train, X_val, Y_train, Y_val = train_test_split(data, labels_binary, stratify=labels, random_state=0) (n_samples, n_features) = X_val.shape pca = PCA(n_components=min(n_samples, n_features), whiten=True).fit(X_val) data_pca = pca.transform(data) return data, data_noisy, data_pca, labels_orig, labels_binary, my_cmap, X_train, X_val, Y_train, Y_val, pca def main_Visualize(self): data_pca = self.GenerateDataandPCA()[2] pca = self.GenerateDataandPCA()[10] fig, ax = plt.subplots(1, 2, figsize=(16, 6)) fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1) # plot data ax[0].scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.2) for l, v in zip(pca.explained_variance_, pca.components_): self.draw_vector([0, 0], v * l * 3, ax[0]) # for length, vector in zip(pca.explained_variance_, pca.components_): # v = vector * 3 * np.sqrt(length) # self.draw_vector(pca.mean_, pca.mean_ + v, ax=ax[0]) ax[0].axis('equal'); ax[0].set(xlabel='x', ylabel='y', title='input') # plot principal components ax[1].scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.2) # self.draw_vector([0, 0], [0, 3], ax=ax[1]) # self.draw_vector([0, 0], [3, 0], ax=ax[1]) ax[1].axis('equal') ax[1].set(xlabel='component 1', ylabel='component 2', title='principal components', xlim=(-5, 5), ylim=(-3, 3.1)) plt.show() return data_pca
|