extratreesclassifier returns all zeroes instead of feature importances - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: General Coding Help (https://python-forum.io/forum-8.html) +--- Thread: extratreesclassifier returns all zeroes instead of feature importances (/thread-37776.html) |
extratreesclassifier returns all zeroes instead of feature importances - Led_Zeppelin - Jul-20-2022 In the following code which should print out feature importances, I instead get a print of a matrix of all zeroes. import numpy as np import pandas as pd %matplotlib inline from sklearn.preprocessing import StandardScaler from statsmodels.tsa.stattools import adfuller from statsmodels.tsa.seasonal import seasonal_decompose import time from tqdm import tqdm from scipy import stats from sklearn.ensemble import ExtraTreesClassifier df = pd.read_csv("shortened_sensor.csv") print('here') pd.set_option("display.max_rows", None, "display.max_columns", None) df.head() #df = df.head(5) #df.to_csv("shortened_sensor.csv", index = False) #df = df.head(5) # Find Duplicate Values # Results will be the list of duplicate values # If no duplicate values, nothing will list. df[df['timestamp'].duplicated(keep=False)] df.isnull().sum() df['machine_status'].value_counts() # Convert timestamp column into data type into datetime df['timestamp'] = pd.to_datetime(df['timestamp']) # Create a Series time_period = pd.Series([]) # Assign values to series for i in tqdm(range(df.shape[0])): if (df["timestamp"][i].hour >= 4) and (df["timestamp"][i].hour < 10): time_period[i]="Morning" elif (df["timestamp"][i].hour >= 10) and (df["timestamp"][i].hour < 16): time_period[i]="Noon" elif (df["timestamp"][i].hour >= 16) and (df["timestamp"][i].hour < 22): time_period[i]="Evening" else: time_period[i]="Night" # Insert new column time period df.insert(2, 'time_period', time_period) # The columns sensor_00, sensor_06, sensor-07, sensor_08, sensor_09, sensor-51 # Missing values are filled with median value of respective columns df['sensor_00'].fillna(df['sensor_00'].median(), inplace=True) df['sensor_06'].fillna(df['sensor_06'].median(), inplace=True) df['sensor_07'].fillna(df['sensor_07'].median(), inplace=True) df['sensor_08'].fillna(df['sensor_08'].median(), inplace=True) df['sensor_09'].fillna(df['sensor_09'].median(), inplace=True) df['sensor_51'].fillna(df['sensor_51'].median(), inplace=True) df['sensor_01'].fillna(df['sensor_01'].median(), inplace=True) df['sensor_02'].fillna(df['sensor_02'].median(), inplace=True) df['sensor_03'].fillna(df['sensor_03'].median(), inplace=True) df['sensor_04'].fillna(df['sensor_04'].median(), inplace=True) df['sensor_05'].fillna(df['sensor_05'].median(), inplace=True) df['sensor_10'].fillna(df['sensor_10'].median(), inplace=True) df df1 = df.copy() df.drop(["Unnamed: 0","timestamp","time_period","machine_status"], axis = 1, inplace=True) df.head() scaler=StandardScaler() df=scaler.fit_transform(df) df columns = [f'sensor_{idx:02d}' for idx in range(52)] df2 = pd.DataFrame(df, columns=columns) df2["machine_status"] = df1["machine_status"] df2.head() # Separating the dependent and independent varibale y = df2 [ 'machine_status' ] X = df2.drop(['machine_status', 'sensor_15'], axis = 1 ) X.head() model = ExtraTreesClassifier() model.fit(X, y) print(model.feature_importances_) I am also including a reduced size cv file to use as data. Why is this happening?How to fix? Any help appreciated. Respectfully, LZ[attachment=1857] |