May I know how to modify my Python programming so that can obtain the accuracy vs number of neighbours as refer to the attached image file -
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# read in the iris data from sklearn.datasets import load_iris iris = load_iris() # create X (features) and y (response) X = iris.data y = iris.target from sklearn.neighbors import KNeighborsClassifier k1 = ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ) k2 = ( 10 , 15 , 20 , 25 , 30 , 35 , 40 ) knn = KNeighborsClassifier(n_neighbors = 10 ) knn.fit(X, y) y_pred = knn.predict(X) from sklearn import metrics metrics.accuracy_score(y,y_pred) knn = KNeighborsClassifier(n_neighbors = 1 ) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4 , random_state = 0 ) # import Matplotlib (scientific plotting library) import matplotlib.pyplot as plt import numpy as np # try K=1 through K=9 and record testing accuracy k1_range = range ( 1 , 9 ) k2_range = range ( 10 , 40 ) # create Python dictionary using [] scores = [] for k1 in k1_range: knn = KNeighborsClassifier(n_neighbors = k1, metric = 'minkowski' , p = 2 ) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) scores.append(metrics.accuracy_score(y_test, y_pred)) for k2 in k2_range: knn = KNeighborsClassifier(n_neighbors = k2, metric = 'minkowski' , p = 2 ) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) scores.append(metrics.accuracy_score(y_test, y_pred)) # plot the relationship between K and testing accuracy # plt.plot(x_axis, y_axis) plt.plot(k1_range, scores) plt.yticks(np.arange( 0.93 , 0.98 , 0.03 )) plt.plot(k2_range, scores) plt.yticks(np.arange( 0.91 , 0.98 , 0.03 )) plt.xlabel( 'Number of neighbors' ) plt.ylabel( 'Accuracy' ) |
The error message is -
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
runfile( 'C:/Users/HSIPL/Desktop/Homework 8 Solution draft.py' , wdir = 'C:/Users/HSIPL/Desktop' ) Traceback (most recent call last): File "<ipython-input-31-1ba40d3637a3>" , line 1 , in <module> runfile( 'C:/Users/HSIPL/Desktop/Homework 8 Solution draft.py' , wdir = 'C:/Users/HSIPL/Desktop' ) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py" , line 668 , in runfile execfile (filename, namespace) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py" , line 108 , in execfile exec ( compile (f.read(), filename, 'exec' ), namespace) File "C:/Users/HSIPL/Desktop/Homework 8 Solution draft.py" , line 45 , in <module> plt.plot(k1_range, scores) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\pyplot.py" , line 3363 , in plot ret = ax.plot( * args, * * kwargs) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\__init__.py" , line 1867 , in inner return func(ax, * args, * * kwargs) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py" , line 1528 , in plot for line in self ._get_lines( * args, * * kwargs): File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\axes\_base.py" , line 406 , in _grab_next_args for seg in self ._plot_args(this, kwargs): File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\axes\_base.py" , line 383 , in _plot_args x, y = self ._xy_from_xy(x, y) File "C:\Users\HSIPL\Anaconda3\lib\site-packages\matplotlib\axes\_base.py" , line 242 , in _xy_from_xy "have shapes {} and {}" . format (x.shape, y.shape)) ValueError: x and y must have same first dimension, but have shapes ( 8 ,) and ( 38 ,) |
Please refer the attached image file -
![[Image: o8oNB.jpg]](https://i.stack.imgur.com/o8oNB.jpg)
Please help me on this case