Sep-06-2022, 09:30 PM
# Comparing classification algorithms import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC filename = 'pima-indians-diabetes.data.csv' names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] dataframe = pd.read_csv(filename, names=names) array = dataframe.values X = array[:,0:8] y = array[:,8] # Prepare models models = [] models.append(('LR', LogisticRegression(solver='newton-cg'))) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # Evaluate each model in turn results = [] names = [] scoring = 'accuracy' seed = 7 for name, model in models: kfold = KFold(n_splits=10, shuffle=True, random_state=seed) cv_results = cross_val_score(model, X, y, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = ('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std())) print(msg) # boxplot algorithm comparison fig = plt.figure() fig.suptitle('Algorithm Comparison') ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.show()