Python Forum
Feature Extraction and Modeling Pipeline
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Feature Extraction and Modeling Pipeline
#1
# Create a pipeline that extracts features from the data then creates (evaluates) a model
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pd.read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
y = array[:,8]
features = []
features.append(('pca', PCA(n_components=3)))
features.append(('select_best', SelectKBest(k=6)))
feature_union = FeatureUnion(features)
# create pipeline
estimators = []
estimators.append(('feature_union', feature_union))
estimators.append(('logistic', LogisticRegression(solver='newton-cg')))
model = Pipeline(estimators)
# evaluate pipeline
seed = 7
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, y, cv=kfold)
print(results.mean())
Reply
#2
May I suggest spacing code out a bit to make it easier to read.

# Create a pipeline that extracts features from the data then creates (evaluates) a model
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

filename = "pima-indians-diabetes.data.csv"
names = ["preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"]

dataframe = pd.read_csv(filename, names=names)
array = dataframe.values
X = array[:, 0:8]
y = array[:, 8]
features = []
features.append(("pca", PCA(n_components=3)))
features.append(("select_best", SelectKBest(k=6)))
feature_union = FeatureUnion(features)

# create pipeline
estimators = []
estimators.append(("feature_union", feature_union))
estimators.append(("logistic", LogisticRegression(solver="newton-cg")))
model = Pipeline(estimators)

# evaluate pipeline
seed = 7
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, y, cv=kfold)
print(results.mean())
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  A Python library for audio and music analysis, feature extraction CMLab 0 1,955 Mar-16-2023, 03:26 AM
Last Post: CMLab
  how to run a command pipeline and get its result output as a piped file Skaperen 0 1,810 Aug-04-2022, 11:59 PM
Last Post: Skaperen
  pipeline.py Skaperen 11 8,947 Feb-25-2017, 05:11 AM
Last Post: Skaperen

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020