import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
# ------------- Data loading section ------------
iris = load_iris()
# -----------------------------------------------
# ----------- Data preparation section ----------
# create X (features) and y (response)
X = iris.data
y = iris.target
# Creating train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
# -----------------------------------------------
# ---- Classifier parameter initialization ------
# allowed ranges for nearest neighbor method
k1_range = range(1, 10)
k2_range = range(10, 41, 5)
# you probably need to specify metric type here, e.g.
# metric_type = 'minkowski' and power, e.g. m_power = 2
# Note: minkowski metric with power 2 is eucledean metric.
# -----------------------------------------------
# ----- main computational block goes here ------
scores1 = list()
for k1 in k1_range:
knn = KNeighborsClassifier(n_neighbors=k1, metric='minkowski', p=2)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
scores1.append(metrics.accuracy_score(y_test, y_pred))
scores2 = list()
for k2 in k2_range:
knn = KNeighborsClassifier(n_neighbors=k2, metric='minkowski', p=2)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
scores2.append(metrics.accuracy_score(y_test, y_pred))
# -----------------------------------------------
# ----------- plotting obtained results ---------
plt.figure()
plt.plot(k1_range, scores1)
plt.yticks(np.arange(0.93, 0.98, 0.03))
plt.ylabel('Accuracy')
plt.figure()
plt.plot(k2_range, scores2)
plt.yticks(np.arange(0.91, 0.98, 0.03))
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()
# -----------------------------------------------