Aug-02-2020, 11:24 PM
(This post was last modified: Aug-03-2020, 03:37 AM by ErnestTBass.)
#!/usr/bin/python # -*- coding: utf-8 -*- import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import xgboost as xgb from sklearn import metrics df = pd.read_csv('plays.csv') print(len(df)) print(df.head()) # drop st plays df = df[~df['isSTPlay']] print (len(df)) # drop kneels df = df[~df['playDescription'].str.contains('kneels')] print(len(df)) #drop overtime df = df[~(df['quarter'] == 5)] print(len(df)) #convert time/quarters def translate_game_clock(row): raw_game_clock = row['GameClock'] quarter = row['quarter'] minutes, seconds_raw = raw_game_clock.partition(':')[::2] seconds = seconds_raw.partition(':')[0] total_seconds_left_in_quarter = int(seconds) + (int(minutes) * 60) if quarter == 3 or quarter == 1: return total_seconds_left_in_quarter + 900 elif quarter == 4 or quarter == 2: return total_seconds_left_in_quarter if 'GameClock' in list (df.columns): df["secondsLeftInHalf"] = df.apply(translate_game_clock, axis=1) if 'quarter' in list (df.columns): df["half"] = df['quarter'].map(lambda q: 2 if q > 2 else 1) def yards_to_endzone(row): if row['possessionTeam'] == row['yardlineSide']: return 100 - row['yardlineNumber'] else : return row['yardlineNumber'] df['yardsToEndzone'] = df.apply(yards_to_endzone, axis = 1) def transform_off_personnel(row): rb_count = 0 te_count = 0 wr_count = 0 ol_count = 0 dl_count = 0 db_count = 0 if not pd.isna(row['personnel.offense']): personnel = row['personnel.offense'].split(',') for p in personnel: if p[2:4] == 'RB': rb_count = int(p[0]) elif p[2:4] == 'TE': te_count = int(p[0]) elif p[2:4] == 'WR': wr_count = int(p[0]) elif p[2:4] == 'OL': ol_count = int(p[0]) elif p[2:4] == 'DL': dl_count = int(p[0]) elif p[2:4] == 'DB': db_count = int(p[0]) return pd.Series([ rb_count, te_count, wr_count, ol_count, dl_count, db_count, ]) df[[ 'rb_count', 'te_count', 'wr_count', 'ol_count', 'dl_count', 'db_count', ]] = df.apply(transform_off_personnel, axis=1) df['offenseFormation'] = df['offenseFormation'].map(lambda f: ('EMPTY' if pd.isna(false) else f)) def formation(row): form = row['offenseFormation'].strip() if form == 'SHOTGUN': return 0 elif form == 'SINGLEBACK': return 1 elif form == 'EMPTY': return 2 elif form == 'I_FORM': return 3 elif form == 'PISTOL': return 4 elif form == 'JUMBO': return 5 elif form == 'WILDCAT': return 6 elif form == 'ACE': return 7 else: return -1 df['numericFormation'] = df.apply(formation, axis=1) print(df.yardlineNumber.unique()) def play_type(row): if row['PassResult'] == 'I' or row['PassResult'] == 'C' or row['PassResult'] == 'S': return 'Passing' else: return 'Rushing' df['play_type'] = df.apply(play_type, axis = 1) df['numeric_PlayType'] = df['play_type'] .map(lambda p : 1 if p == 'Passing' else 0) df_final= df[['down','yardsToGo','yarsdtoEndzone','rb_count','te_count','wr_count','ol_count','db_count','secondsLeftInHalf', 'half','numericPlayType', 'numericFormation','play_type']] print(df.final.describe(include='all')) print(df.yardlineNumber.unique()) df['yardlineNumber'] = df['yardlineNumber'].fillna(50) sns.catplot(x='play_type', kind='count', data=df_final, orient='h') plt.show() sns.catplot(x="down", kind="count", hue='play_type', data=df_final) plt.show() sns.lmplot(x="yrdsToGo", y="numericPlayType", data=df_final, y_jitter=0.03, Logistic=True, aspect=2); plt.show() train_df, validation_df, test_df = np.split(df_final.sample(frac=1),[int(0.7 * len(df)),int(0.9 * len(df))]) print("Training size is %d, validation size is %d, test_size is %d" % (len(train_df), len(validation_df),len(test_df))) train_clean_df = train_df.drop(columns=['numericPlayType']) d_train = xgb.DMatrix(train_clean_df, label=train_df['numericPlayType'],feature_names=list(train_clean_df)) val_clean_df = train_df.drop(columns=['numericPlayType']) d_val = xgb.DMatrix(val_clean_df, label=valiation_df['numericPlayType'],feature_names=list(val_clean_df)) eval_list [(d_train, 'train'), (d_val, 'eval')] results ={} parm = { 'objective': 'binary:logistic', 'eval_metric': 'auc', 'max_depth': 5, 'eta': 0.2, 'rate_drop': 0.2, 'min_child_weight': 6, 'gama' : 4, 'subsample': 0.8, 'alpha': 0.1 } num_round = 250 xgb_model = xgb.train(param, d_train, num_round, eval_list, early_stopping_rounds=8) test_clean_df = test_df.drop(columns=['numericPlayType']) d_test = xgb.DMatrix(test_clean_df, label=test_df['numericPlayType'], feature_names=list(test_clean_df)) actual = test_df['numericPlayType'] predictions = xgb_model.predict(d_test) print(predictions[:5]) accuracy_predictions = np.round(predictions) accuracy = metrics.accuracy_score(actual, rounded_predictions) print("Metrics:\nAccuracy: % 4f" % (accuracy))I am not sure what is goingon.My python is not good enough to detect the error.
Any help appreciated. Thanks in advance.
the link is:
https://opensource.com/article/19/10/pre...%20More%20
Error:UnboundLocalError Traceback (most recent call last)
<ipython-input-1-bceecf1fca01> in <module>
123 'dl_count',
124 'db_count',
--> 125 ]] = df.apply(transform_off_personnel, axis=1)
126
127 df['offenseFormation'] = df['offenseFormation'].map(lambda f: ('EMPTY' if pd.isna(false) else f))
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
/opt/conda/lib/python3.7/site-packages/pandas/core/apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
/opt/conda/lib/python3.7/site-packages/pandas/core/apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
/opt/conda/lib/python3.7/site-packages/pandas/core/apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
<ipython-input-1-bceecf1fca01> in transform_off_personnel(row)
85 personnel = row['personnel.offense'].split(',')
86
---> 87 for p in personnel:
88
89 if p[2:4] == 'RB':
UnboundLocalError: ("local variable 'personnel' referenced before assignment", 'occurred at index 12807')
Respectfully,ErnestTBass
Attached Files