Jun-17-2021, 01:33 PM
@Larz60+ have a look at the overall codes
import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib from sklearn.model_selection import train_test_split from sklearn import preprocessing from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier matplotlib.style.use('ggplot') # Look Pretty def plotDecisionBoundary(model, X, y): fig = plt.figure() ax = fig.add_subplot(111) padding = 0.6 resolution = 0.0025 colors = ['royalblue','forestgreen','ghostwhite'] # Calculate the boundaris x_min, x_max = X[:, 0].min(), X[:, 0].max() y_min, y_max = X[:, 1].min(), X[:, 1].max() x_range = x_max - x_min y_range = y_max - y_min x_min -= x_range * padding y_min -= y_range * padding x_max += x_range * padding y_max += y_range * padding xx, yy = np.meshgrid(np.arange(x_min, x_max, resolution), np.arange(y_min, y_max, resolution)) Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Plot the contour map cs = plt.contourf(xx, yy, Z, cmap=plt.cm.terrain) # Plot the test original points as well... for label in range(len(np.unique(y))): indices = np.where(y == label) plt.scatter(X[indices, 0], X[indices, 1], c=colors[label], label=str(label), alpha=0.8) p = model.get_params() plt.axis('tight') plt.title('K = ' + str(p['n_neighbors'])) X = pd.read_csv(r"D:\\Clustering\\text-cluster-master\\Articles.csv", error_bad_lines=False) X.head() y = X.Articles.copy() X.drop(['Articles'], axis=1, inplace=True) y = y.astype("category").cat.codes X.fillna(X.mean(), inplace=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) normaliser = preprocessing.Normalizer().fit(X_train) X_train_normalised = normaliser.transform(X_train) X_train = pd.DataFrame(X_train_normalised) X_test_normalised = normaliser.transform(X_test) X_test = pd.DataFrame(X_test_normalised) pca_reducer = PCA(n_components=2).fit(X_train_normalised) X_train = pca_reducer.transform(X_train_normalised) X_test = pca_reducer.transform(X_test_normalised) knn = KNeighborsClassifier(n_neighbors=9) knn.fit(X_train, y_train) plotDecisionBoundary(knn, X_train, y_train) print(knn.score(X_test, y_test)) plt.show()