Jul-15-2019, 08:14 AM
I was trying to load a dataset from my local computer using pandas when I run the code I got these problems, please anyone help me :
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
#!/usr/bin/env python ''' An example file to show how to use the feature-selection code in ml_lib ''' import pandas from tqdm import tqdm import pandas as pd import csv from sklearn.datasets import fetch_mldata from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn import svm import numpy as np import seaborn as sns import matplotlib.pyplot as plt import feature_select import depmeas if __name__ = = '__main__' : NUM_CV = 3 RANDOM_SEED = 123 MAX_ITER = 1000 leuk = pd.read_csv(r 'C:/Users/pc/Desktop/dataset/leukemia.csv' ) X = leuk[ 'data' ] y = leuk[ 'target' ] # split the data for testing (X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size = 0.3 , random_state = RANDOM_SEED) # perform feature selection num_features_to_select = 25 K_MAX = 1000 estimator = depmeas.mi_tau n_jobs = - 1 feature_ranking = feature_select.feature_select(X_train, y_train, num_features_to_select = num_features_to_select, K_MAX = K_MAX, estimator = estimator, n_jobs = n_jobs) num_selected_features = len (feature_ranking ) # for each feature, compute the accuracy on the test data as we add features mean_acc = np.empty((num_selected_features,)) var_acc = np.empty((num_selected_features,)) for ii in tqdm( range (num_selected_features), desc = 'Computing Classifier Performance...' ): classifier = svm.SVC(random_state = RANDOM_SEED,max_iter = MAX_ITER) X_test_in = X_test[:,feature_ranking [ 0 :ii + 1 ]] scores = cross_val_score(classifier, X_test_in, y_test, cv = NUM_CV, n_jobs = - 1 ) mu = scores.mean() sigma_sq = scores.std() mean_acc[ii] = mu var_acc[ii] = sigma_sq x = np.arange(num_selected_features) + 1 y = mean_acc yLo = mean_acc - var_acc / 2. yHi = mean_acc + var_acc / 2. plt.plot(x,y) plt.fill_between(x,yLo,yHi,alpha = 0.2 ) plt.grid( True ) plt.title( 'Leukemia Dataset Feature Selection\n Total # Features=%d' % (X.shape[ 1 ])) plt.xlabel( '# Selected Features' ) plt.ylabel( 'SVC Classifier Accuracy' ) plt.show() |
Error:Traceback (most recent call last):
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'data'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/pc/PycharmProjects/MymrmrTest/feature_select_test.py", line 39, in <module>
X = leuk['data']
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\pc\PycharmProjects\MymrmrTest\venv\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'data'
Process finished with exit code 1