nesticot commited on
Commit
e4ded93
·
verified ·
1 Parent(s): f39ff4d

Delete batting_update.py

Browse files
Files changed (1) hide show
  1. batting_update.py +0 -608
batting_update.py DELETED
@@ -1,608 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import joblib
4
- import math
5
- import pickle
6
-
7
- loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
- in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
- attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
- px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
- pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
- barrel_model = joblib.load('joblib_model/barrel_model.joblib')
14
-
15
-
16
- def percentile(n):
17
- def percentile_(x):
18
- return np.nanpercentile(x, n)
19
- percentile_.__name__ = 'percentile_%s' % n
20
- return percentile_
21
-
22
-
23
- def df_update(df=pd.DataFrame()):
24
- df.loc[df['sz_top']==0,'sz_top'] = np.nan
25
- df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
26
-
27
-
28
- df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
29
- if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
30
- df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
31
- df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
32
-
33
-
34
- # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
35
- if len(df.loc[(~df['px'].isna())&
36
- (df['in_zone'].isna())&
37
- (~df['sz_top'].isna())]) > 0:
38
- print('We found missing data')
39
- df.loc[(~df['px'].isna())&
40
- (df['in_zone'].isna())&
41
- (~df['sz_top'].isna()),'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
42
- (df['in_zone'].isna())&
43
- (~df['sz_top'].isna())][['px','pz','sz_top','sz_bot']].values)
44
-
45
- hit_codes = ['single',
46
- 'double','home_run', 'triple']
47
-
48
- ab_codes = ['single', 'strikeout', 'field_out',
49
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
50
- 'double', 'field_error', 'home_run', 'triple',
51
- 'double_play',
52
- 'fielders_choice_out', 'strikeout_double_play',
53
- 'other_out','triple_play']
54
-
55
-
56
- obp_true_codes = ['single', 'walk',
57
- 'double','home_run', 'triple',
58
- 'hit_by_pitch', 'intent_walk']
59
-
60
- obp_codes = ['single', 'strikeout', 'walk', 'field_out',
61
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
62
- 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
63
- 'hit_by_pitch', 'double_play', 'intent_walk',
64
- 'fielders_choice_out', 'strikeout_double_play',
65
- 'sac_fly_double_play',
66
- 'other_out','triple_play']
67
-
68
-
69
- contact_codes = ['In play, no out',
70
- 'Foul', 'In play, out(s)',
71
- 'In play, run(s)',
72
- 'Foul Bunt']
73
-
74
-
75
-
76
- conditions_hit = [df.event_type.isin(hit_codes)]
77
- choices_hit = [True]
78
- df['hits'] = np.select(conditions_hit, choices_hit, default=False)
79
-
80
- conditions_ab = [df.event_type.isin(ab_codes)]
81
- choices_ab = [True]
82
- df['ab'] = np.select(conditions_ab, choices_ab, default=False)
83
-
84
- conditions_obp_true = [df.event_type.isin(obp_true_codes)]
85
- choices_obp_true = [True]
86
- df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
87
-
88
- conditions_obp = [df.event_type.isin(obp_codes)]
89
- choices_obp = [True]
90
- df['obp'] = np.select(conditions_obp, choices_obp, default=False)
91
-
92
- bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
93
-
94
- conditions_bip = [df.play_description.isin(bip_codes)]
95
- choices_bip = [True]
96
- df['bip'] = np.select(conditions_bip, choices_bip, default=False)
97
-
98
- # conditions = [
99
- # (df['launch_speed'].isna()),
100
- # (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
101
- # ]
102
- df['bip_div'] = ~df.launch_speed.isna()
103
- # choices = [False,True]
104
- # df['barrel'] = np.select(conditions, choices, default=np.nan)
105
- # df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
106
- df['barrel'] = np.nan
107
- if len(df.loc[(~df['launch_speed'].isnull())]) > 0:
108
- df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']])
109
-
110
-
111
- conditions_ss = [
112
- (df['launch_angle'].isna()),
113
- (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
114
- ]
115
-
116
- choices_ss = [False,True]
117
- df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
118
-
119
- conditions_hh = [
120
- (df['launch_speed'].isna()),
121
- (df['launch_speed'] >= 94.5 )
122
- ]
123
-
124
- choices_hh = [False,True]
125
- df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
126
-
127
-
128
- conditions_tb = [
129
- (df['event_type']=='single'),
130
- (df['event_type']=='double'),
131
- (df['event_type']=='triple'),
132
- (df['event_type']=='home_run'),
133
- ]
134
-
135
- choices_tb = [1,2,3,4]
136
-
137
- df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
138
-
139
- conditions_woba = [
140
- (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
141
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
142
- 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
143
- 'sac_fly_double_play', 'other_out'])),
144
- (df['event_type']=='walk'),
145
- (df['event_type']=='hit_by_pitch'),
146
- (df['event_type']=='single'),
147
- (df['event_type']=='double'),
148
- (df['event_type']=='triple'),
149
- (df['event_type']=='home_run'),
150
- ]
151
-
152
- choices_woba = [0,
153
- 0.696,
154
- 0.726,
155
- 0.883,
156
- 1.244,
157
- 1.569,
158
- 2.004]
159
-
160
- df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
161
-
162
-
163
- woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
164
- 'double', 'sac_fly', 'force_out', 'home_run',
165
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
166
- 'triple', 'sac_bunt', 'double_play',
167
- 'fielders_choice_out', 'strikeout_double_play',
168
- 'sac_fly_double_play', 'other_out']
169
-
170
-
171
-
172
-
173
-
174
-
175
- conditions_woba_code = [
176
- (df['event_type'].isin(woba_codes))
177
- ]
178
-
179
- choices_woba_code = [1]
180
-
181
- df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
182
-
183
-
184
- df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
185
-
186
- #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
187
-
188
- # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
189
- # df['in_zone_3'] = df['in_zone_2'] < 10
190
- # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
191
-
192
-
193
- df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
194
- df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
195
- df['swings'] = [1 if x == True else 0 for x in df.is_swing]
196
-
197
-
198
- df['out_zone'] = df.in_zone == False
199
- df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
200
- df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
201
- df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
202
- df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
203
-
204
- df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
205
- df['bb'] = df.event_type.isin(['walk','intent_walk'])
206
-
207
- df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
208
- df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
209
-
210
- df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
211
- df['pitches'] = [1 if x else 0 for x in df.is_pitch]
212
-
213
-
214
- df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
215
-
216
-
217
- pitch_cat = {'FA':'Fastball',
218
- 'FF':'Fastball',
219
- 'FT':'Fastball',
220
- 'FC':'Fastball',
221
- 'FS':'Off-Speed',
222
- 'FO':'Off-Speed',
223
- 'SI':'Fastball',
224
- 'ST':'Breaking',
225
- 'SL':'Breaking',
226
- 'CU':'Breaking',
227
- 'KC':'Breaking',
228
- 'SC':'Off-Speed',
229
- 'GY':'Off-Speed',
230
- 'SV':'Breaking',
231
- 'CS':'Breaking',
232
- 'CH':'Off-Speed',
233
- 'KN':'Off-Speed',
234
- 'EP':'Breaking',
235
- 'UN':np.nan,
236
- 'IN':np.nan,
237
- 'PO':np.nan,
238
- 'AB':np.nan,
239
- 'AS':np.nan,
240
- 'NP':np.nan}
241
- df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
242
- df['average'] = 'average'
243
-
244
- df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
245
- df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
246
- df.loc[df['trajectory'] == '','trajectory'] = np.nan
247
- df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
248
- df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
249
-
250
- df['attack_zone'] = np.nan
251
-
252
-
253
-
254
- df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
255
-
256
-
257
-
258
- df['heart'] = df['attack_zone'] == 0
259
- df['shadow'] = df['attack_zone'] == 1
260
- df['chase'] = df['attack_zone'] == 2
261
- df['waste'] = df['attack_zone'] == 3
262
-
263
- df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
264
- df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
265
- df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
266
- df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
267
-
268
- df['xwoba'] = np.nan
269
- df['xwoba_contact'] = np.nan
270
-
271
- if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0:
272
-
273
-
274
- df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
275
-
276
- ## Assign a value of 0.696 to every walk in the dataset
277
- df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696
278
-
279
- ## Assign a value of 0.726 to every hit by pitch in the dataset
280
- df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726
281
-
282
- ## Assign a value of 0 to every Strikeout in the dataset
283
- df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0
284
-
285
-
286
- df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
287
-
288
-
289
- return df
290
-
291
- def df_update_summ(df=pd.DataFrame()):
292
- df_summ = df.groupby(['batter_id','batter_name']).agg(
293
- pa = ('pa','sum'),
294
- ab = ('ab','sum'),
295
- obp_pa = ('obp','sum'),
296
- hits = ('hits','sum'),
297
- on_base = ('on_base','sum'),
298
- k = ('k','sum'),
299
- bb = ('bb','sum'),
300
- bb_minus_k = ('bb_minus_k','sum'),
301
- csw = ('csw','sum'),
302
- bip = ('bip','sum'),
303
- bip_div = ('bip_div','sum'),
304
- tb = ('tb','sum'),
305
- woba = ('woba','sum'),
306
- woba_contact = ('woba_contact','sum'),
307
- xwoba = ('xwoba','sum'),
308
- xwoba_contact = ('xwoba_contact','sum'),
309
- woba_codes = ('woba_codes','sum'),
310
- hard_hit = ('hard_hit','sum'),
311
- barrel = ('barrel','sum'),
312
- sweet_spot = ('sweet_spot','sum'),
313
- max_launch_speed = ('launch_speed','max'),
314
- launch_speed_90 = ('launch_speed',percentile(90)),
315
- launch_speed = ('launch_speed','mean'),
316
- launch_angle = ('launch_angle','mean'),
317
- pitches = ('is_pitch','sum'),
318
- swings = ('swings','sum'),
319
- in_zone = ('in_zone','sum'),
320
- out_zone = ('out_zone','sum'),
321
- whiffs = ('whiffs','sum'),
322
- zone_swing = ('zone_swing','sum'),
323
- zone_contact = ('zone_contact','sum'),
324
- ozone_swing = ('ozone_swing','sum'),
325
- ozone_contact = ('ozone_contact','sum'),
326
- ground_ball = ('trajectory_ground_ball','sum'),
327
- line_drive = ('trajectory_line_drive','sum'),
328
- fly_ball =('trajectory_fly_ball','sum'),
329
- pop_up = ('trajectory_popup','sum'),
330
- attack_zone = ('attack_zone','count'),
331
- heart = ('heart','sum'),
332
- shadow = ('shadow','sum'),
333
- chase = ('chase','sum'),
334
- waste = ('waste','sum'),
335
- heart_swing = ('heart_swing','sum'),
336
- shadow_swing = ('shadow_swing','sum'),
337
- chase_swing = ('chase_swing','sum'),
338
- waste_swing = ('waste_swing','sum'),
339
- ).reset_index()
340
- return df_summ
341
-
342
- def df_update_summ_avg(df=pd.DataFrame()):
343
- df_summ_avg = df.groupby(['average']).agg(
344
- pa = ('pa','sum'),
345
- ab = ('ab','sum'),
346
- obp_pa = ('obp','sum'),
347
- hits = ('hits','sum'),
348
- on_base = ('on_base','sum'),
349
- k = ('k','sum'),
350
- bb = ('bb','sum'),
351
- bb_minus_k = ('bb_minus_k','sum'),
352
- csw = ('csw','sum'),
353
- bip = ('bip','sum'),
354
- bip_div = ('bip_div','sum'),
355
- tb = ('tb','sum'),
356
- woba = ('woba','sum'),
357
- woba_contact = ('woba_contact','sum'),
358
- xwoba = ('xwoba','sum'),
359
- xwoba_contact = ('xwoba_contact','sum'),
360
- woba_codes = ('woba_codes','sum'),
361
- hard_hit = ('hard_hit','sum'),
362
- barrel = ('barrel','sum'),
363
- sweet_spot = ('sweet_spot','sum'),
364
- max_launch_speed = ('launch_speed','max'),
365
- launch_speed_90 = ('launch_speed',percentile(90)),
366
- launch_speed = ('launch_speed','mean'),
367
- launch_angle = ('launch_angle','mean'),
368
- pitches = ('is_pitch','sum'),
369
- swings = ('swings','sum'),
370
- in_zone = ('in_zone','sum'),
371
- out_zone = ('out_zone','sum'),
372
- whiffs = ('whiffs','sum'),
373
- zone_swing = ('zone_swing','sum'),
374
- zone_contact = ('zone_contact','sum'),
375
- ozone_swing = ('ozone_swing','sum'),
376
- ozone_contact = ('ozone_contact','sum'),
377
- ground_ball = ('trajectory_ground_ball','sum'),
378
- line_drive = ('trajectory_line_drive','sum'),
379
- fly_ball =('trajectory_fly_ball','sum'),
380
- pop_up = ('trajectory_popup','sum'),
381
- attack_zone = ('attack_zone','count'),
382
- heart = ('heart','sum'),
383
- shadow = ('shadow','sum'),
384
- chase = ('chase','sum'),
385
- waste = ('waste','sum'),
386
- heart_swing = ('heart_swing','sum'),
387
- shadow_swing = ('shadow_swing','sum'),
388
- chase_swing = ('chase_swing','sum'),
389
- waste_swing = ('waste_swing','sum'),
390
-
391
-
392
-
393
-
394
- ).reset_index()
395
- return df_summ_avg
396
-
397
- def df_summ_changes(df_summ=pd.DataFrame()):
398
- df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
399
- df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
400
- df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
401
-
402
- df_summ['ops'] = df_summ['obp']+df_summ['slg']
403
-
404
- df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
405
- df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
406
- df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
407
-
408
- df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
409
-
410
-
411
-
412
-
413
- df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
414
-
415
-
416
- df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
417
-
418
- df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
419
- df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
420
- #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
421
- df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
422
-
423
-
424
- df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
425
-
426
- df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
427
-
428
- df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
429
-
430
- df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
431
-
432
- df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
433
-
434
- df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
435
-
436
- df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
437
-
438
- df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
439
-
440
- df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
441
-
442
- df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
443
-
444
- df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
445
-
446
- df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
447
-
448
- df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
449
-
450
-
451
-
452
- df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
453
-
454
- df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
455
-
456
- df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
457
-
458
- df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
459
-
460
-
461
- df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
462
-
463
- df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
464
-
465
- df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
466
-
467
- df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
468
-
469
-
470
-
471
-
472
- df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
473
- df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
474
-
475
- df_summ = df_summ.dropna(subset=['bip'])
476
- return df_summ
477
-
478
- def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
479
- df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
480
- df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
481
- df_summ_player = df_summ.xs(batter_select,level=0)
482
- df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
483
- return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
484
-
485
- def df_summ_batter_pitch_up(df=pd.DataFrame()):
486
- df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg(
487
- pa = ('pa','sum'),
488
- ab = ('ab','sum'),
489
- obp_pa = ('obp','sum'),
490
- hits = ('hits','sum'),
491
- on_base = ('on_base','sum'),
492
- k = ('k','sum'),
493
- bb = ('bb','sum'),
494
- bb_minus_k = ('bb_minus_k','sum'),
495
- csw = ('csw','sum'),
496
- bip = ('bip','sum'),
497
- bip_div = ('bip_div','sum'),
498
- tb = ('tb','sum'),
499
- woba = ('woba','sum'),
500
- woba_contact = ('xwoba_contact','sum'),
501
- xwoba = ('xwoba','sum'),
502
- xwoba_contact = ('xwoba','sum'),
503
- woba_codes = ('woba_codes','sum'),
504
- hard_hit = ('hard_hit','sum'),
505
- barrel = ('barrel','sum'),
506
- sweet_spot = ('sweet_spot','sum'),
507
- max_launch_speed = ('launch_speed','max'),
508
- launch_speed_90 = ('launch_speed',percentile(90)),
509
- launch_speed = ('launch_speed','mean'),
510
- launch_angle = ('launch_angle','mean'),
511
- pitches = ('is_pitch','sum'),
512
- swings = ('swings','sum'),
513
- in_zone = ('in_zone','sum'),
514
- out_zone = ('out_zone','sum'),
515
- whiffs = ('whiffs','sum'),
516
- zone_swing = ('zone_swing','sum'),
517
- zone_contact = ('zone_contact','sum'),
518
- ozone_swing = ('ozone_swing','sum'),
519
- ozone_contact = ('ozone_contact','sum'),
520
- ground_ball = ('trajectory_ground_ball','sum'),
521
- line_drive = ('trajectory_line_drive','sum'),
522
- fly_ball =('trajectory_fly_ball','sum'),
523
- pop_up = ('trajectory_popup','sum'),
524
- attack_zone = ('attack_zone','count'),
525
- heart = ('heart','sum'),
526
- shadow = ('shadow','sum'),
527
- chase = ('chase','sum'),
528
- waste = ('waste','sum'),
529
- heart_swing = ('heart_swing','sum'),
530
- shadow_swing = ('shadow_swing','sum'),
531
- chase_swing = ('chase_swing','sum'),
532
- waste_swing = ('waste_swing','sum'),
533
- ).reset_index()
534
-
535
- #return df_summ_batter_pitch
536
- df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
537
- df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
538
- df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
539
-
540
- df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
541
-
542
- df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
543
- df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
544
- df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
545
-
546
- df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
547
-
548
-
549
-
550
-
551
- df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
552
-
553
-
554
- df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
555
-
556
- df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
557
- df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
558
- #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
559
- df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
560
-
561
-
562
- df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
563
-
564
- df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
565
-
566
- df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
567
-
568
- df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
569
-
570
- df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
571
-
572
- df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
573
-
574
- df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
575
-
576
- df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
577
-
578
- df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
579
-
580
- df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
581
-
582
- df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
583
-
584
- df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
585
-
586
- df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
587
-
588
-
589
- df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
590
-
591
- df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
592
-
593
- df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
594
-
595
- df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
596
-
597
-
598
-
599
-
600
- df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
601
- df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
602
-
603
-
604
-
605
-
606
- df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
607
-
608
- return df_summ_batter_pitch