Mar-12-2020, 12:46 AM
Spent some time on this tonight. Note that in my version I read the csv differently, from a different source (my Google drive). It still has errors, but the plot at the end shows the modeled values - next step is to plot the actual vs the modeled, and do stats on them if you want
# Load libraries from pandas import read_csv from pandas.plotting import scatter_matrix from matplotlib import pyplot from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedKFold from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC import pandas as pd from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression # Load dataset from google.colab import drive drive.mount('/content/drive') filename = (r'/content/drive/My Drive/analyza_casovych_radov.csv') cols = ['Rok', 'Pocet prepravenych cestujucich', ] dataset = pd.read_csv(filename, names=cols) df = dataset trainval_dataset = df.sample(frac=0.8,random_state=42) test_dataset = df.drop(trainval_dataset.index) train_dataset = trainval_dataset.sample(frac=0.8, random_state=42) validate_dataset = trainval_dataset.drop(train_dataset.index) print () print(f"Train {train_dataset.shape} Validate {validate_dataset.shape} Test {test_dataset.shape}") print () print () print ('train_dataset= ') print (train_dataset) print () print ('test_dataset= ') print (test_dataset) print () print ('validate_dataset= ') print (validate_dataset) print() X = train_dataset['Rok'] y = train_dataset['Pocet prepravenych cestujucich'] poly = PolynomialFeatures(2) X_poly = poly.fit_transform(X.to_frame().values.reshape(-1, 1)) poly.fit(X_poly, y) lin2 = LinearRegression() lin2.fit(X_poly, y) plt.scatter(X.values, y, color = 'blue') plt.plot(X.values, lin2.predict(poly.fit_transform(X_poly)), color = 'red') plt.title('Polynomial Regression') plt.xlabel('Rok') plt.ylabel('Other') plt.show() #print (poly.fit_transform(X)) #plt.scatter(X, y, color = 'blue') #plt.plot(X, (poly.fit_transform(X)), color = 'red') #plt.show()
Output:---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-20-b5736948e378> in <module>()
64 plt.scatter(X.values, y, color = 'blue')
65
---> 66 plt.plot(X.values, lin2.predict(poly.fit_transform(X_poly)), color = 'red')
67 plt.title('Polynomial Regression')
68 plt.xlabel('Rok')
2 frames
/usr/local/lib/python3.6/dist-packages/sklearn/utils/extmath.py in safe_sparse_dot(a, b, dense_output)
149 ret = np.dot(a, b)
150 else:
--> 151 ret = a @ b
152
153 if (sparse.issparse(a) and sparse.issparse(b)
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 10)