Hi,
I have below code to do SVR and Random forest method to predict, my code working well, but I want to do something which I could not be able to do,
I want to do following:
1. I want to run the code 10 times by each time split data randomly (10 times) and do the prediction each time
1.1. TrainSet & TestSet are in 80%:20% (80% TrainSet, 20% testSet) proportion
2. I want to arrange the prediction summary of 10 times in tabular form including:
TrainSet_size TestSet_size accuracy_SVR Accuracy_RandomForest
I have below code to do SVR and Random forest method to predict, my code working well, but I want to do something which I could not be able to do,
I want to do following:
1. I want to run the code 10 times by each time split data randomly (10 times) and do the prediction each time
1.1. TrainSet & TestSet are in 80%:20% (80% TrainSet, 20% testSet) proportion
2. I want to arrange the prediction summary of 10 times in tabular form including:
TrainSet_size TestSet_size accuracy_SVR Accuracy_RandomForest
import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy import stats dataFileName='RandomForestInput.xlsx' sheetName='Data' dataRaw=pd.read_excel(dataFileName,sheetname=sheetName) noData=len(dataRaw) import matplotlib.pylab as plt from sklearn.cross_validation import train_test_split from sklearn.cross_validation import cross_val_score from sklearn.preprocessing import StandardScaler import pandas as pd import numpy as np labels=['x1','x2','x3'] x=dataRaw[labels] y=dataRaw['y'] X_train,X_test,Y_train,Y_test=train_test_split(x,y,test_size=0.1,random_state=0) sc=StandardScaler() sc.fit(X_train) x_std=sc.transform(x) X_train_std=sc.transform(X_train) X_test_std=sc.transform(X_test) from sklearn.svm import SVC from numpy import stack from sklearn.metrics import accuracy_score from sklearn.svm import SVR linear_svm=SVC(kernel='linear') linear_svm.fit(X_train_std,Y_train) y_pred=linear_svm.predict(X_test_std) coef=linear_svm.coef_[0] coef=np.absolute(coef) svm_indices=np.argsort(coef)[::-1] print('Linear SVM') print("Accuracy: %.2f" %accuracy_score(Y_test,y_pred)) for f in range(X_train.shape[1]): print(("%2d) %-*s %f" % (f+1,30,labels[svm_indices[f]],coef[svm_indices[f]]))) from sklearn.ensemble import RandomForestClassifier from numpy import stack from sklearn.metrics import accuracy_score forest=RandomForestClassifier(criterion='entropy',n_estimators=100,random_state=1,n_jobs=2) forest.fit(X_train,Y_train) y_pred=forest.predict(X_test) #forest.fit=(X_train,Y_train) #y_pred=forest.predict(X_test) importances=forest.feature_importances_ indices=np.argsort(importances)[::-1] print('RandonForest') print("Accuracy: %.2f" % accuracy_score(Y_test,y_pred)) for f in range(X_train.shape[1]): print(("%2d) %-*s %f" %(f+1,30,labels[indices[f]],importances[indices[f]])))
Attached Files
RandomForestInput.xlsx (Size: 9.07 KB / Downloads: 29)