Python Forum

Full Version: Compare Machine Learning Regression Algorithms Consistently
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
# Comparison of Regression Algorithms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
filename = 'housing1.csv'
names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
dataframe = pd.read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:13]
y = array[:,13]
# Prepare the models
models = []
models.append(('LR', LinearRegression()))
models.append(('RR', Ridge()))
models.append(('LaR', Lasso()))
models.append(('ELN', ElasticNet()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('CART', DecisionTreeRegressor()))
models.append(('SVR', SVR()))
# Evaluate models in turn
results = []
names = []
seed = 7
scoring = 'neg_mean_squared_error'
for name, model in models:
    kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
    cv_results = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = ('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
    print(msg)
# Boxplot Algorithm Comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()