Why do not link to site where you got code from
Code may have work at one time,but not it don't.
This is how it can be with many of these data science blogs trow together at one time to test some stuff.
You should look at method used,then write own code to test it out.
I can do fix so this mess work,but really not so much point in this,write own code looking at method used.

Code may have work at one time,but not it don't.
This is how it can be with many of these data science blogs trow together at one time to test some stuff.
You should look at method used,then write own code to test it out.
I can do fix so this mess work,but really not so much point in this,write own code looking at method used.
#!/usr/bin/env python # coding: utf-8 # In[ ]: # !/usr/bin/python # -*- coding: utf-8 -*- import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt #import xgboost as xgb #from sklearn import metrics df = pd.read_csv('plays.csv') print(len(df)) print(df.head()) # In[ ]: # drop st plays df = df[~df['isSTPlay']] print(len(df)) # In[ ]: # drop kneels df = df[~df['playDescription'].str.contains('kneels')] print(len(df)) # In[ ]: # drop overtime df = df[~(df['quarter'] == 5)] print(len(df)) # In[ ]: # convert time/quarters def translate_game_clock(row): raw_game_clock = row['GameClock'] quarter = row['quarter'] minutes, seconds_raw = raw_game_clock.partition(':')[::2] seconds = seconds_raw.partition(':')[0] total_seconds_left_in_quarter = int(seconds) + (int(minutes) * 60) if quarter == 3 or quarter == 1: return total_seconds_left_in_quarter + 900 elif quarter == 4 or quarter == 2: return total_seconds_left_in_quarter if 'GameClock' in list(df.columns): df["secondsLeftInHalf"] = df.apply(translate_game_clock, axis=1) if 'quarter' in list(df.columns): df["half"] = df['quarter'].map(lambda q: 2 if q > 2 else 1) # In[ ]: def yards_to_endzone(row): if row['possessionTeam'] == row['yardlineSide']: return 100 - row['yardlineNumber'] else: return row['yardlineNumber'] df['yardsToEndzone'] = df.apply(yards_to_endzone, axis=1) # In[ ]: def transform_off_personnel(row): rb_count = 0 te_count = 0 wr_count = 0 ol_count = 0 dl_count = 0 db_count = 0 if not pd.isna(row['personnel.offense']): personnel = row['personnel.offense'].split(',') for p in personnel: if p[2:4] == 'RB': rb_count = int(p[0]) elif p[2:4] == 'TE': te_count = int(p[0]) elif p[2:4] == 'WR': wr_count = int(p[0]) elif p[2:4] == 'OL': ol_count = int(p[0]) elif p[2:4] == 'DL': dl_count = int(p[0]) elif p[2:4] == 'DB': db_count = int(p[0]) return pd.Series([ rb_count, te_count, wr_count, ol_count, dl_count, db_count, ]) # In[ ]: df[[ 'rb_count', 'te_count', 'wr_count', 'ol_count', 'dl_count', 'db_count', ]] = df.apply(transform_off_personnel, axis=1) df['offenseFormation'] = df['offenseFormation'].map(lambda f: ('EMPTY' if pd.isna(False) else f)) def formation(row): try: form = row['offenseFormation'].strip() except AttributeError: form = row['offenseFormation'] if form == 'SHOTGUN': return 0 elif form == 'SINGLEBACK': return 1 elif form == 'EMPTY': return 2 elif form == 'I_FORM': return 3 elif form == 'PISTOL': return 4 elif form == 'JUMBO': return 5 elif form == 'WILDCAT': return 6 elif form == 'ACE': return 7 else: return -1 df['numericFormation'] = df.apply(formation, axis=1) print(df.yardlineNumber.unique()) # In[ ]: def play_type(row): if row['PassResult'] == 'I' or row['PassResult'] == 'C' or row['PassResult'] == 'S': return 'Passing' else: return 'Rushing' df['play_type'] = df.apply(play_type, axis=1) df['numeric_PlayType'] = df['play_type'] .map(lambda p: 1 if p == 'Passing' else 0) # In[ ]: df_final = df[['down', 'yardsToGo', 'rb_count', 'te_count', 'wr_count', 'ol_count', 'db_count', 'secondsLeftInHalf', 'half', 'numericFormation', 'play_type']] # In[ ]: #print(df.final.describe(include='all')) # In[ ]: print(df.yardlineNumber.unique()) # In[ ]: df['yardlineNumber'] = df['yardlineNumber'].fillna(50) # In[ ]: sns.catplot(x='play_type', kind='count', data=df_final, orient='h') plt.show() # In[ ]: sns.catplot(x="down", kind="count", hue='play_type', data=df_final) plt.show() # In[ ]: #sns.lmplot(x="yrdsToGo", y="numericPlayType", data=df_final, y_jitter=0.03, logistic=True, aspect=2) #plt.show() # In[ ]: train_df, validation_df, test_df = np.split(df_final.sample(frac=1), [int(0.7 * len(df)), int(0.9 * len(df))]) print("Training size is %d, validation size is %d, test_size is %d" % (len(train_df), len(validation_df), len(test_df))) # In[ ]: #train_clean_df = train_df.drop(columns=['numericPlayType']) #d_train = xgb.DMatrix(train_clean_df, label=train_df['numericPlayType'], feature_names=list(train_clean_df)) # In[ ]: #val_clean_df = train_df.drop(columns=['numericPlayType']) #d_val = xgb.DMatrix(val_clean_df, label=validation_df['numericPlayType'], feature_names=list(val_clean_df)) #eval_list = [(d_train, 'train'), (d_val, 'eval')] #results = {} # In[ ]: param = { 'objective': 'binary:logistic', 'eval_metric': 'auc', 'max_depth': 5, 'eta': 0.2, 'rate_drop': 0.2, 'min_child_weight': 6, 'gama': 4, 'subsample': 0.8, 'alpha': 0.1 } # In[ ]: num_round = 250 #xgb_model = xgb.train(param, d_train, num_round, eval_list, early_stopping_rounds=8) # In[ ]: #test_clean_df = test_df.drop(columns=['numericPlayType']) #d_test = xgb.DMatrix(test_clean_df, label=test_df['numericPlayType'], feature_names=list(test_clean_df)) # In[ ]: #actual = test_df['numericPlayType'] #predictions = xgb_model.predict(d_test) #print(predictions[:5]) # In[ ]: #rounded_predictions = np.round(predictions) #accuracy = metrics.accuracy_score(actual, rounded_predictions) #print("Metrics:\nAccuracy: % 4f" % (accuracy)) # In[ ]: