nesticot commited on
Commit
9e363eb
·
verified ·
1 Parent(s): 0f0d254

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +708 -0
  2. batting_update.py +608 -0
app.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import requests
4
+ import math
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import matplotlib.patches as patches
8
+ import matplotlib.colors as mcolors
9
+ import matplotlib
10
+ import inflect
11
+ infl = inflect.engine()
12
+ from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
13
+ from matplotlib.colors import Normalize
14
+ from matplotlib.ticker import FuncFormatter
15
+ import matplotlib.ticker as mtick
16
+ from matplotlib.colors import Normalize
17
+ import urllib
18
+ import urllib.request
19
+ import urllib.error
20
+ from urllib.error import HTTPError
21
+ import time
22
+ from shinywidgets import output_widget, render_widget
23
+ import shinyswatch
24
+ from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
25
+
26
+ column_list = ['woba_percent',
27
+ 'xwoba_percent',
28
+ 'barrel_percent',
29
+ 'sweet_spot_percent',
30
+ 'hard_hit_percent',
31
+ 'launch_speed',
32
+ 'launch_speed_90',
33
+ 'max_launch_speed',
34
+ 'k_percent',
35
+ 'bb_percent',
36
+ 'swing_percent',
37
+ 'whiff_rate',
38
+ 'zone_swing_percent',
39
+ 'zone_contact_percent',
40
+ 'chase_percent',
41
+ 'chase_contact']
42
+ column_list_pitch = ['pitches','bip','xwoba_percent','whiff_rate','chase_percent']
43
+
44
+ import joblib
45
+
46
+
47
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
48
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
49
+
50
+ stat_plot_dict = {'woba_percent':{'name':'wOBA','format':'.3f','flip':False},
51
+ 'xwoba_percent':{'name':'xwOBA','format':'.3f','flip':False},
52
+ 'woba_percent_contact':{'name':'wOBACON','format':'.3f','flip':False},
53
+ 'barrel_percent':{'name':'Barrel%','format':'.1%','flip':False},
54
+ 'max_launch_speed':{'name':'Max EV','format':'.1f','flip':False},
55
+ 'launch_speed_90':{'name':'90th% EV','format':'.1f','flip':False},
56
+ 'launch_speed':{'name':'Avg EV','format':'.1f','flip':False},
57
+ 'sweet_spot_percent':{'name':'SwSpot%','format':'.1%','flip':False},
58
+ 'hard_hit_percent':{'name':'HardHit%','format':'.1%','flip':False},
59
+ 'k_percent':{'name':'K%','format':'.1%','flip':True},
60
+ 'bb_percent':{'name':'BB%','format':'.1%','flip':False},
61
+ 'zone_contact_percent':{'name':'Z-Contact%','format':'.1%','flip':False},
62
+ 'zone_swing_percent':{'name':'Z-Swing%','format':'.1%','flip':False},
63
+ 'zone_percent':{'name':'Zone%','format':'.1%','flip':False},
64
+ 'chase_percent':{'name':'O-Swing%','format':'.1%','flip':True},
65
+ 'chase_contact':{'name':'O-Contact%','format':'.1%','flip':False},
66
+ 'swing_percent':{'name':'Swing%','format':'.1%','flip':False},
67
+ 'whiff_rate':{'name':'Whiff%','format':'.1%','flip':True},
68
+ 'bip':{'name':'Balls in Play','format':'.0f','flip':False},
69
+ 'pitches':{'name':'Pitches','format':'.0f','flip':False},}
70
+
71
+ stat_plot_dict_rolling = {'woba_percent':{'name':'wOBA','format':'.3f','flip':False,'y':'woba','div':'woba_codes','y_min':0.2,'y_max':0.6,'x_label':'wOBA PA','form':'3f'},
72
+ 'xwoba_percent':{'name':'xwOBA','format':'.3f','flip':False,'y':'xwoba','div':'woba_codes','y_min':0.2,'y_max':0.6,'x_label':'xwOBA PA','form':'3f'},
73
+ 'k_percent':{'name':'K%','format':'.1%','flip':True,'y':'k','div':'pa','y_min':0.0,'y_max':0.4,'x_label':'PA','form':'1%'},
74
+ 'bb_percent':{'name':'BB%','format':'.1%','flip':False,'y':'bb','div':'pa','y_min':0.0,'y_max':0.3,'x_label':'PA','form':'1%'},
75
+ 'zone_contact_percent':{'name':'Z-Contact%','format':'.1%','flip':False,'y':'zone_contact','div':'zone_swing','y_min':0.6,'y_max':1.0,'x_label':'In-Zone Swings','form':'1%'},
76
+ 'zone_swing_percent':{'name':'Z-Swing%','format':'.1%','flip':False,'y':'zone_swing','div':'in_zone','y_min':0.5,'y_max':1.0,'x_label':'In-Zone Pitches','form':'1%'},
77
+ 'zone_percent':{'name':'Zone%','format':'.1%','flip':False,'y':'in_zone','div':'pitches','y_min':0.3,'y_max':0.7,'x_label':'Pitches','form':'1%'},
78
+ 'chase_percent':{'name':'O-Swing%','format':'.1%','flip':True,'y':'ozone_swing','div':'out_zone','y_min':0.1,'y_max':0.4,'x_label':'Out-of-Zone Pitches','form':'1%'},
79
+ 'chase_contact':{'name':'O-Contact%','format':'.1%','flip':False,'y':'ozone_contact','div':'ozone_swing','y_min':0.4,'y_max':0.8,'x_label':'Out-of-Zone Swings','form':'1%'},
80
+ 'swing_percent':{'name':'Swing%','format':'.1%','flip':False,'y':'swings','div':'pitches','y_min':0.3,'y_max':0.7,'x_label':'Pitches','form':'1%'},
81
+ 'whiff_rate':{'name':'Whiff%','format':'.1%','flip':True,'y':'whiffs','div':'swings','y_min':0.0,'y_max':0.5,'x_label':'Swings','form':'1%'},}
82
+
83
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#0C7BDC","#FFFFFF","#FFB000"])
84
+ cmap_sum_r = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFB000","#FFFFFF","#0C7BDC",])
85
+ cmap_sum.set_bad(color='#C7C7C7', alpha=1.0)
86
+ cmap_sum_r.set_bad(color='#C7C7C7', alpha=1.0)
87
+
88
+ from batting_update import df_update,df_update_summ_avg,df_update_summ,df_summ_batter_pitch_up,df_summ_changes,df_summ_filter_out
89
+
90
+ def percentile(n):
91
+ def percentile_(x):
92
+ return np.nanpercentile(x, n)
93
+ percentile_.__name__ = 'percentile_%s' % n
94
+ return percentile_
95
+
96
+ print('Reading A')
97
+ ### Import Datasets
98
+ from datasets import load_dataset
99
+ dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2024.csv' ])
100
+ dataset_train = dataset['train']
101
+ df_a = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
102
+
103
+
104
+ sport_id_input = 1
105
+
106
+
107
+ print('Reading A')
108
+ df_a_update = df_update(df_a)
109
+
110
+
111
+ #df_a_update['batter_id'] = df_a_update['batter_id'].astype(int)
112
+ df_a_update['batter_name'] = df_a_update['batter_name'].str.strip(' ')
113
+
114
+ df_a_update['bip'] = df_a_update['bip'].replace({'0':False,'False':False,'True':True})
115
+
116
+
117
+ choices_woba = [0.696,
118
+ 0.726,
119
+ 0.883,
120
+ 1.244,
121
+ 1.569,
122
+ 2.004]
123
+
124
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
125
+ 'double', 'sac_fly', 'force_out', 'home_run',
126
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
127
+ 'triple', 'sac_bunt', 'double_play',
128
+ 'fielders_choice_out', 'strikeout_double_play',
129
+ 'sac_fly_double_play', 'other_out']
130
+
131
+
132
+
133
+ df_a_update['bip_div'] = ~df_a_update.launch_speed.isna()
134
+
135
+ # df_dom_update['bip_div'] = ~df_dom_update.launch_speed.isna()
136
+ df_a_update['average'] = 'average'
137
+ #df_dom_update['average'] = 'average'
138
+
139
+ #df_u['is_pitch']
140
+
141
+ df_summ_a_update = df_summ_changes(df_update_summ(df_a_update)).set_index(['batter_id','batter_name'])
142
+ # df_summ_dom_update = df_summ_changes(df_update_summ(df_dom_update)).set_index(['batter_id','batter_name'])
143
+
144
+ df_summ_avg_a_update = df_summ_changes(df_update_summ_avg(df_a_update)).set_index(['average'])
145
+ # df_summ_avg_dom_update = df_summ_changes(df_update_summ_avg(df_dom_update)).set_index(['average'])
146
+
147
+ stat_roll_dict = dict(zip(stat_plot_dict_rolling.keys(),
148
+ [stat_plot_dict_rolling[x]['name'] for x in stat_plot_dict_rolling]))
149
+
150
+ df_a_update['batter_id'] = df_a_update['batter_id'].astype(float).astype(int)
151
+
152
+ a_player_dict = df_a_update.drop_duplicates(
153
+ 'batter_id')[['batter_id','batter_name']].sort_values(by='batter_name').set_index('batter_id').to_dict()['batter_name']
154
+ # dom_player_dict = df_summ_dom_update.reset_index().drop_duplicates(
155
+ # 'batter_id')[['batter_id','batter_name']].sort_values(by='batter_name').set_index('batter_id').to_dict()['batter_name']
156
+
157
+
158
+ import api_scraper
159
+ mlb_stats = api_scraper.MLB_Scrape()
160
+
161
+ def get_color(value, vmin, vmax, cmap_name=cmap_sum):
162
+ # Normalize the value within the range [0, 1]
163
+ normalized_value = (value - vmin) / (vmax - vmin)
164
+
165
+ # Get the colormap
166
+ cmap = plt.get_cmap(cmap_name)
167
+
168
+ # Map the normalized value to a color in the colormap
169
+ color = cmap(normalized_value)
170
+
171
+ # Convert the color from RGBA to hexadecimal format
172
+ hex_color = mcolors.rgb2hex(color)
173
+
174
+ return hex_color
175
+
176
+ def server(input, output, session):
177
+ @render.ui
178
+ def test():
179
+ # @reactive.Effect
180
+
181
+
182
+ return ui.input_select("player_id", "Select Batter",a_player_dict,selectize=True)
183
+ # if input.my_tabs() == 'LIDOM':
184
+ # return ui.input_select("player_id", "Select Batter",dom_player_dict,selectize=True)
185
+
186
+
187
+ @output
188
+ @render.plot(alt="A Plot")
189
+ @reactive.event(input.go, ignore_none=False)
190
+ def a_plot():
191
+ ### Iniput data for the level
192
+ #time.sleep(2)
193
+ df_update = df_a_update.copy()
194
+ df_summ_update = df_summ_a_update.copy()
195
+ df_summ_avg_update = df_summ_avg_a_update.copy()
196
+ if len(input.player_id()) < 1:
197
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
198
+ ax.text(s='Please Select a Batter',x=0.5,y=0.5, ha='center')
199
+ ax.axis('off')
200
+ return fig
201
+
202
+
203
+ batter_select = int(input.player_id())
204
+
205
+ df_roll = df_update[df_update['batter_id']==batter_select]
206
+ if len(df_roll) == 0:
207
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
208
+ ax.text(s='Card is Generating',x=0.5,y=0.5, ha='center')
209
+ ax.axis('off')
210
+ return fig
211
+
212
+ df_summ_filter = df_summ_filter_out(df_summ=df_summ_update,batter_select = batter_select)[0]
213
+ df_summ_filter_pct = df_summ_filter_out(df_summ=df_summ_update,batter_select = batter_select)[1]
214
+ df_summ_player = df_summ_filter_out(df_summ=df_summ_update,batter_select = batter_select)[2]
215
+ df_summ_player_pct = df_summ_filter_out(df_summ=df_summ_update,batter_select = batter_select)[3]
216
+
217
+ df_summ_batter_pitch = df_summ_batter_pitch_up(df= df_update).set_index(['batter_id','batter_name','pitch_category'])
218
+
219
+
220
+ df_summ_batter_pitch_pct = df_summ_batter_pitch.loc[df_summ_filter.index.get_level_values(0)]
221
+ df_summ_batter_pitch_pct = df_summ_batter_pitch_pct[df_summ_batter_pitch_pct['pitches']>0]
222
+ df_summ_batter_pitch_pct_rank = df_summ_batter_pitch_pct.groupby(level='pitch_category').apply(lambda x: x.rank(pct=True)).xs(batter_select,level=0)
223
+
224
+ df_summ_batter_pitch_pct_rank['pitch_count'] = df_summ_batter_pitch_pct_rank.index.get_level_values(1).map(df_summ_batter_pitch.xs(batter_select,level=0).reset_index().set_index('pitch_category')['pitches'].to_dict())
225
+ df_summ_batter_pitch_pct_rank = df_summ_batter_pitch_pct_rank.sort_values('pitch_count',ascending=False)
226
+ #df_summ_batter_pitch_pct_rank = df_summ_batter_pitch_pct_rank.dropna()
227
+
228
+
229
+
230
+ def rolling_plot(stat='k_percent',window_width=100,ax=0,df_r=df_roll,df_r_summ_avg=pd.DataFrame(),stat_plot_dict_rolling=stat_plot_dict_rolling):
231
+ plot = sns.lineplot(x=range(window_width,len(df_r[df_r[stat_plot_dict_rolling[stat]['div']]>0])+1),
232
+ y=df_r[df_r[stat_plot_dict_rolling[stat]['div']]==1].fillna(0).rolling(window=window_width)[stat_plot_dict_rolling[stat]['y']].sum().dropna()/window_width,
233
+ ax=ax,
234
+ color="#FFB000",
235
+ zorder=10)
236
+
237
+
238
+
239
+ # ["#0C7BDC","#FFFFFF","#FFB000"])
240
+ ax.set_xlim(window_width,len(df_r[df_r[stat_plot_dict_rolling[stat]['div']]==1]))
241
+ ax.set_xlabel(stat_plot_dict_rolling[stat]['x_label'],fontsize=8)
242
+ ax.set_ylabel(stat_plot_dict_rolling[stat]['name'],fontsize=8)
243
+
244
+ ax.hlines(df_r_summ_avg[stat_plot_dict_rolling[stat]['y']]/df_r_summ_avg[stat_plot_dict_rolling[stat]['div']],
245
+ xmin=window_width,
246
+ xmax=len(df_r[df_r[stat_plot_dict_rolling[stat]['div']]==1]),
247
+ color="#0C7BDC",linestyles='-.')
248
+ ax.hlines(sum(df_r[stat_plot_dict_rolling[stat]['y']].dropna())/sum(df_r[stat_plot_dict_rolling[stat]['div']].dropna()),
249
+ xmin=window_width,
250
+ xmax=len(df_r[df_r[stat_plot_dict_rolling[stat]['div']]==1]),
251
+ color="#FFB000",linestyles='--')
252
+ #print(sum(df_r[stat_plot_dict_rolling[stat]['y']].dropna())/sum(df_r[stat_plot_dict_rolling[stat]['div']].dropna()))
253
+ ax.tick_params(axis='x', labelsize=8) # Set x-axis ticks size
254
+ ax.tick_params(axis='y', labelsize=8) # Set y-axis ticks size
255
+ ax.set_title(f"{window_width} {stat_plot_dict_rolling[stat]['x_label']} Rolling {stat_plot_dict_rolling[stat]['name']}",fontsize=8)
256
+ ax.set_ylim(stat_plot_dict_rolling[stat]['y_min'],stat_plot_dict_rolling[stat]['y_max'])
257
+ ax.grid(True,alpha=0.2)
258
+
259
+
260
+ if stat_plot_dict_rolling[stat]['form'] == '3f':
261
+ ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:.3f}'))
262
+
263
+ elif stat_plot_dict_rolling[stat]['form'] == '1f':
264
+ ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:.1f}'))
265
+
266
+ elif stat_plot_dict_rolling[stat]['form'] == '1%':
267
+ ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))
268
+
269
+ return plot
270
+
271
+ dict_level = {1:'MLB',
272
+ 11:'MiLB AAA',
273
+ 12:'MiLB AA',
274
+ 13:'MiLB High-A',
275
+ 14:'MiLB A'}
276
+
277
+ def plot_card(sport_id_input=sport_id_input,
278
+ batter_select=batter_select,
279
+ df_roll=df_roll,
280
+ df_summ_player=df_summ_player,
281
+ df_summ_update = df_summ_update,
282
+ df_summ_batter_pitch_pct=df_summ_batter_pitch_pct,
283
+ ):
284
+
285
+ #player_df = get_players(sport_id=sport_id_input)
286
+ mlb_teams = mlb_stats.get_teams()
287
+ team_logos = pd.read_csv('team_logos.csv')
288
+ if sport_id_input == 1:
289
+ player_bio = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={batter_select}&appContext=majorLeague&hydrate=currentTeam').json()
290
+ else:
291
+ player_bio = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={batter_select}&appContext=minorLeague&hydrate=currentTeam').json()
292
+
293
+ fig = plt.figure(figsize=(10, 10))#,dpi=600)
294
+ plt.rcParams.update({'figure.autolayout': True})
295
+ fig.set_facecolor('white')
296
+ sns.set_theme(style="whitegrid", palette="pastel")
297
+ from matplotlib.gridspec import GridSpec
298
+ gs = GridSpec(5, 5, width_ratios=[0.2,1,1,1,0.2], height_ratios=[0.6,0.05,0.15,.30,0.025])
299
+ gs.update(hspace=0.4, wspace=0.5)
300
+
301
+ # gs.update(left=0.1,right=0.9,top=0.97,bottom=0.03,wspace=0.3,hspace=0.09)
302
+
303
+ # ax1 = plt.subplot(4,1,1)
304
+ # ax2 = plt.subplot(2,2,2)
305
+ # ax3 = plt.subplot(2,2,3)
306
+ # ax4 = plt.subplot(4,1,4)
307
+ #ax2 = plt.subplot(3,3,2)
308
+
309
+ # Add subplots to the grid
310
+ ax = fig.add_subplot(gs[0, :])
311
+ #ax1 = fig.add_subplot(gs[2, 0])
312
+ # ax2 = fig.add_subplot(gs[2, :]) # Subplot at the top-right position
313
+ # fig, ax = plt.subplots(1,1,figsize=(10,12))
314
+ ax.axis('off')
315
+
316
+ width = 0.08
317
+ height = width*2.45
318
+ if df_summ_player['launch_speed'].isna().values[0]:
319
+ df_summ_player['sweet_spot_percent'] = np.nan
320
+ df_summ_player['barrel_percent'] = np.nan
321
+ df_summ_player['hard_hit_percent'] = np.nan
322
+ df_summ_player['xwoba_percent'] = np.nan
323
+ if df_summ_player['launch_speed'].isna().values[0]:
324
+ df_summ_player_pct['sweet_spot_percent'] = np.nan
325
+ df_summ_player_pct['barrel_percent'] = np.nan
326
+ df_summ_player_pct['hard_hit_percent'] = np.nan
327
+ df_summ_player_pct['xwoba_percent'] = np.nan
328
+ # x = 0.1
329
+ # y = 0.9
330
+ for cat in range(len(column_list)):
331
+
332
+ # if cat < len(column_list)/2:
333
+ x_adjust, y_adjust =(0.85/7*8)*cat/8+0.075 - (0.85/7*8)*math.floor((cat)/8), 0.45-math.floor((cat)/8)/3.2
334
+
335
+ # else:
336
+ # x_adjust, y_adjust = (cat-len(column_list)/2)*(1.7/(math.ceil((len(column_list)-1))))+0.1, 0.5
337
+ #print( x_adjust, y_adjust)
338
+ if sum(df_summ_player[column_list[cat]].isna()) < 1:
339
+ print(f'{df_summ_player[column_list[cat]].values[0]:{stat_plot_dict[column_list[cat]]["format"]}}')
340
+ ax.text(s = f'{df_summ_player[column_list[cat]].values[0]:{stat_plot_dict[column_list[cat]]["format"]}}'.format().strip(),
341
+
342
+ x = x_adjust,
343
+ y = y_adjust,
344
+ color='black',
345
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
346
+ fontsize = 16,
347
+ ha='center',
348
+ va='center')
349
+
350
+ if stat_plot_dict[column_list[cat]]['flip']:
351
+
352
+ bbox = patches.Rectangle((x_adjust- width/2,y_adjust- height/2), width, height, linewidth=1,edgecolor='black',
353
+ facecolor = get_color(df_summ_player_pct[column_list[cat]].values[0],0,1,cmap_name=cmap_sum_r))
354
+ ax.add_patch(bbox)
355
+
356
+
357
+ else:
358
+ bbox = patches.Rectangle((x_adjust- width/2,y_adjust- height/2), width, height, linewidth=1,edgecolor='black',
359
+ facecolor = get_color(df_summ_player_pct[column_list[cat]].values[0],0,1,cmap_name=cmap_sum))
360
+ ax.add_patch(bbox)
361
+ else:
362
+ print(f'{df_summ_player[column_list[cat]].values[0]:{stat_plot_dict[column_list[cat]]["format"]}}')
363
+ ax.text(s = f'{df_summ_player[column_list[cat]].fillna("N/A").values[0]}',
364
+
365
+ x = x_adjust,
366
+ y = y_adjust,
367
+ color='black',
368
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
369
+ fontsize = 14,
370
+ ha='center',
371
+ va='center')
372
+
373
+ if stat_plot_dict[column_list[cat]]['flip']:
374
+
375
+ bbox = patches.Rectangle((x_adjust- width/2,y_adjust- height/2), width, height, linewidth=1,edgecolor='black',
376
+ facecolor = get_color(df_summ_player_pct[column_list[cat]].values[0],0,1,cmap_name=cmap_sum_r))
377
+ ax.add_patch(bbox)
378
+
379
+
380
+ else:
381
+ bbox = patches.Rectangle((x_adjust- width/2,y_adjust- height/2), width, height, linewidth=1,edgecolor='black',
382
+ facecolor = get_color(df_summ_player_pct[column_list[cat]].values[0],0,1,cmap_name=cmap_sum))
383
+ ax.add_patch(bbox)
384
+
385
+ ax.text(s = stat_plot_dict[column_list[cat]]['name'],
386
+
387
+ x = x_adjust,
388
+ y = y_adjust-0.14,
389
+ color='black',
390
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
391
+ fontsize = 12,
392
+ ha='center',
393
+ va='center')
394
+
395
+ ax.text(s = f"{player_bio['people'][0]['fullName']}",
396
+
397
+ x = 0.5,
398
+ y = 0.95,
399
+ color='black',
400
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
401
+ fontsize = 28,
402
+ ha='center',
403
+ va='center')
404
+ if 'parentOrgId' in player_bio['people'][0]['currentTeam']:
405
+
406
+ ax.text(s = f"{player_bio['people'][0]['primaryPosition']['abbreviation']}, {mlb_teams[mlb_teams['team_id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['franchise'].values[0]}",
407
+
408
+ x = 0.5,
409
+ y = 0.85,
410
+ color='black',
411
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
412
+ fontsize = 14,
413
+ ha='center',
414
+ va='center')
415
+
416
+ else: ax.text(s = f"{player_bio['people'][0]['primaryPosition']['abbreviation']}, {player_bio['people'][0]['currentTeam']['name']}",
417
+
418
+ x = 0.5,
419
+ y = 0.85,
420
+ color='black',
421
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
422
+ fontsize = 14,
423
+ ha='center',
424
+ va='center')
425
+
426
+ ax.text(s =
427
+ f"B/T: {player_bio['people'][0]['batSide']['code']}/"
428
+ f"{player_bio['people'][0]['pitchHand']['code']} "
429
+ f"{player_bio['people'][0]['height']}/"
430
+ f"{player_bio['people'][0]['weight']}",
431
+
432
+ x = 0.5,
433
+ y = 0.785,
434
+ color='black',
435
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
436
+ fontsize = 14,
437
+ ha='center',
438
+ va='center')
439
+
440
+ ax.text(s =
441
+
442
+ f"DOB: {player_bio['people'][0]['birthDate']} "
443
+ f"Age: {player_bio['people'][0]['currentAge']}",
444
+ x = 0.5,
445
+ y = 0.72,
446
+ color='black',
447
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
448
+ fontsize = 14,
449
+ ha='center',
450
+ va='center')
451
+ if sport_id_input == 1:
452
+ try:
453
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{batter_select}/headshot/67/current.png'
454
+ test_mage = plt.imread(url)
455
+ except urllib.error.HTTPError as err:
456
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
457
+
458
+ else:
459
+ try:
460
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{batter_select}/headshot/milb/current.png'
461
+ test_mage = plt.imread(url)
462
+ except urllib.error.HTTPError as err:
463
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
464
+ im = plt.imread(url)
465
+ # response = requests.get(url)
466
+ # im = Image.open(BytesIO(response.content), cmap='viridis')
467
+ # im = plt.imread(np.array(PIL.Image.open(urllib.request.urlopen(url))))
468
+
469
+ # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
470
+ imagebox = OffsetImage(im, zoom = 0.3)
471
+ ab = AnnotationBbox(imagebox, (0.125, 0.8), frameon = False)
472
+ ax.add_artist(ab)
473
+
474
+ if 'parentOrgId' in player_bio['people'][0]['currentTeam']:
475
+ url = team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0]
476
+
477
+ im = plt.imread(url)
478
+ # response = requests.get(url)
479
+ # im = Image.open(BytesIO(response.content))
480
+ # im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
481
+ # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
482
+ imagebox = OffsetImage(im, zoom = 0.225)
483
+ ab = AnnotationBbox(imagebox, (0.875, 0.8), frameon = False)
484
+ ax.add_artist(ab)
485
+
486
+ else:
487
+ url = team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['id']]['imageLink'].values[0]
488
+ im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['id']]['imageLink'].values[0])
489
+
490
+ # im = plt.imread(url)
491
+ # response = requests.get(url)
492
+ # im = Image.open(BytesIO(response.content))
493
+ #im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
494
+
495
+ # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
496
+ imagebox = OffsetImage(im, zoom = 0.225)
497
+ ab = AnnotationBbox(imagebox, (0.875, 0.8), frameon = False)
498
+ ax.add_artist(ab)
499
+
500
+ ax.text(s = f'2024 {dict_level[sport_id_input]} Metrics',
501
+
502
+ x = 0.5,
503
+ y = 0.62,
504
+ color='black',
505
+ #bbox=dict(facecolor='none', edgecolor='black', pad=10.0),
506
+ fontsize = 20,
507
+ ha='center',
508
+ va='center')
509
+
510
+ df_plot = df_summ_batter_pitch[column_list_pitch].xs([batter_select,df_summ_update.xs(batter_select,level=0).index[0]]).sort_values('pitches',ascending=False)#.dropna()
511
+ df_plot = df_plot[df_plot['pitches'] > 0]
512
+
513
+ df_plot_pct = df_summ_batter_pitch_pct[column_list_pitch].xs([batter_select,df_summ_update.xs(batter_select,level=0).index[0]]).sort_values('pitches',ascending=False)#.dropna()
514
+
515
+ value = 1
516
+ # Normalize the value
517
+ colormap = plt.get_cmap(cmap_sum)
518
+ colormap_r = plt.get_cmap(cmap_sum_r)
519
+ norm = Normalize(vmin=0, vmax=1)
520
+
521
+
522
+
523
+ col_5_colour = [colormap_r(norm(x)) for x in list((df_summ_batter_pitch_pct_rank['chase_percent']))]
524
+ col_4_colour = [colormap_r(norm(x)) for x in list((df_summ_batter_pitch_pct_rank['whiff_rate']))]
525
+ col_3_colour = [colormap(norm(x)) for x in list((df_summ_batter_pitch_pct_rank['woba_percent_contact']))]
526
+ col_2_colour = ['white']*len(df_summ_batter_pitch_pct_rank)
527
+ col_1_colour = ['white']*len(df_summ_batter_pitch_pct_rank)
528
+ colour_df = pd.DataFrame(data=[col_1_colour,col_2_colour,col_3_colour,col_4_colour,col_5_colour]).T.values
529
+
530
+ ax_table = fig.add_subplot(gs[2, 1:-1])
531
+ ax_table.axis('off')
532
+ print(colour_df)
533
+ print(df_plot)
534
+ table = ax_table.table(cellText=df_plot.values, colLabels=[stat_plot_dict[x]['name'] for x in df_plot.columns],rowLabels=df_plot.index, cellLoc='center',
535
+ bbox=[0.13, 0.0, 0.79, 1],colWidths=[0.1]*len(df_plot.columns),
536
+ loc='center',cellColours=colour_df)
537
+ ax_table.text(x=0.5,y=1.1,s='Metrics By Pitch Type',ha='center',fontdict={ 'size': 12},fontname='arial')
538
+
539
+ w, h = table[0,1].get_width(), table[0,1].get_height()
540
+ cell_i = table.add_cell(0, -1, w,h, text='Pitch Type')
541
+ cell_i.get_text().set_horizontalalignment('left')
542
+ min_font_size = 12
543
+ # Set table properties
544
+
545
+ table.auto_set_font_size(False)
546
+ table.set_fontsize(min_font_size)
547
+ #table.set_fontname('arial')
548
+ table.scale(1, len(df_plot)*0.3)
549
+
550
+
551
+ int_list = ['pitches','bip']
552
+ for fl in int_list:
553
+ # Subset of column names
554
+ subset_columns = [fl]
555
+
556
+ # Get the list of column indices
557
+ column_indices = [df_plot.columns.get_loc(col) for col in subset_columns]
558
+
559
+ # # print(column_indices)
560
+ for row_l in range(1,len(df_plot)+1):
561
+ # print(row_l)
562
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
563
+ # print()
564
+ # print(fl)
565
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
566
+
567
+
568
+
569
+ float_3_list = ['xwoba_percent']
570
+ for fl in float_3_list:
571
+ # Subset of column names
572
+ subset_columns = [fl]
573
+
574
+ # Get the list of column indices
575
+ column_indices = [df_plot.columns.get_loc(col) for col in subset_columns]
576
+
577
+ # # print(column_indices)
578
+ for row_l in range(1,len(df_plot)+1):
579
+ # print(row_l)
580
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
581
+ # print()
582
+ # print(fl)
583
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.3f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
584
+
585
+
586
+
587
+ percent_list = ['whiff_rate','chase_percent']
588
+
589
+
590
+ for fl in percent_list:
591
+ # Subset of column names
592
+ subset_columns = [fl]
593
+
594
+ # Get the list of column indices
595
+ column_indices = [df_plot.columns.get_loc(col) for col in subset_columns]
596
+
597
+ # # print(column_indices)
598
+ for row_l in range(1,len(df_plot)+1):
599
+ # print(row_l)
600
+ if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
601
+
602
+ # print(fl)
603
+ table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
604
+
605
+
606
+
607
+ stat_1 = input.stat_1()
608
+ window_width_1 = input.window_1()
609
+ stat_2 = input.stat_2()
610
+ window_width_2 = input.window_2()
611
+ stat_3 = input.stat_3()
612
+ window_width_3 = input.window_3()
613
+
614
+
615
+ inset_ax = ax = fig.add_subplot(gs[3, 1])
616
+ rolling_plot(stat=stat_1,window_width=window_width_1,ax=inset_ax,df_r=df_roll,df_r_summ_avg=df_summ_avg_update)
617
+
618
+ inset_ax = ax = fig.add_subplot(gs[3, 2])
619
+ rolling_plot(stat=stat_2,window_width=window_width_2,ax=inset_ax,df_r=df_roll,df_r_summ_avg=df_summ_avg_update)
620
+
621
+ inset_ax = ax = fig.add_subplot(gs[3, 3])
622
+ rolling_plot(stat=stat_3,window_width=window_width_3,ax=inset_ax,df_r=df_roll,df_r_summ_avg=df_summ_avg_update)
623
+
624
+ ax_bot = ax = fig.add_subplot(gs[4, :])
625
+
626
+ ax_bot.text(x=0.05,y=-0.5,s='By: @TJStats',ha='left',fontdict={ 'size': 14},fontname='arial')
627
+ ax_bot.text(x=1-0.05,y=-0.5,s='Data: MLB',ha='right',fontdict={ 'size': 14},fontname='arial')
628
+ ax_bot.axis('off')
629
+
630
+
631
+ ax_cbar = fig.add_subplot(gs[1,1:-1])
632
+
633
+ cb = matplotlib.colorbar.ColorbarBase(ax_cbar, orientation='horizontal',
634
+ cmap=cmap_sum)
635
+ #ax_cbar.axis('off')
636
+ ax_cbar.text(x=0.5,y=1.2,s='Colour Scale - Percentiles',ha='center',fontdict={ 'size': 12},fontname='arial')
637
+ ax_cbar.text(s='0%',x=0.01,y=0.5,va='center',ha='left')
638
+ ax_cbar.text(s='100%',x=0.99,y=0.5,va='center',ha='right')
639
+ # ax_cbar.text(s='50%',x=0.5,y=0.5,va='center',ha='center')
640
+ # ax_cbar.text(s='50%',x=0.5,y=0.5,va='center',ha='center')
641
+ # ax_cbar.text(s='50%',x=0.5,y=0.5,va='center',ha='center')
642
+ ax_cbar.set_xticks([])
643
+ ax_cbar.set_yticks([])
644
+ ax_cbar.set_xticklabels([])
645
+ ax_cbar.set_yticklabels([])
646
+
647
+ # Display only the outline of the axis
648
+ for spine in ax_cbar.spines.values():
649
+ spine.set_visible(True) # Show only the outline
650
+ spine.set_color('black') # Set the color to black
651
+
652
+ # fig.set_facecolor('#ffffff')
653
+
654
+ return fig.subplots_adjust(left=0.03, right=0.97, top=0.95, bottom=0.05)
655
+
656
+
657
+ return plot_card(sport_id_input=sport_id_input,
658
+ batter_select=batter_select,
659
+ df_roll=df_roll,
660
+ df_summ_player=df_summ_player,
661
+ df_summ_batter_pitch_pct=df_summ_batter_pitch_pct,
662
+ )
663
+
664
+
665
+
666
+ from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
667
+
668
+
669
+
670
+ app = App(ui.page_fluid(
671
+ # ui.tags.base(href=base_url),
672
+ ui.tags.div(
673
+ {"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
674
+ ui.tags.style(
675
+ """
676
+ h4 {
677
+ margin-top: 1em;font-size:35px;
678
+ }
679
+ h2{
680
+ font-size:25px;
681
+ }
682
+ """
683
+ ),
684
+ shinyswatch.theme.simplex(),
685
+ ui.tags.h4("TJStats"),
686
+ ui.tags.i("Baseball Analytics and Visualizations"),
687
+ ui.row(
688
+ ui.layout_sidebar(
689
+
690
+ ui.panel_sidebar(ui.output_ui('test',"Select Batter"),
691
+ ui.input_select('stat_1',"Select Rolling Stat 1",stat_roll_dict,selectize=True),
692
+ ui.input_numeric('window_1',"Select Rolling Window 1",value=100),
693
+ ui.input_select('stat_2',"Select Rolling Stat 2",stat_roll_dict,selected='k_percent',selectize=True),
694
+ ui.input_numeric('window_2',"Select Rolling Stat 2",value=100),
695
+ ui.input_select('stat_3',"Select Rolling Stat 3",stat_roll_dict,selected='bb_percent',selectize=True),
696
+ ui.input_numeric('window_3',"Select Rolling Stat 3",value=100),
697
+ ui.input_action_button("go", "Generate",class_="btn-primary"),width=2),
698
+
699
+ ui.page_navbar(
700
+
701
+ ui.nav_panel("Player Cards",
702
+ ui.output_plot('a_plot',width='1000px',height='1000px')),
703
+ id="my_tabs",
704
+ ))),)),server)
705
+
706
+
707
+
708
+ # app = App(app_ui, server)
batting_update.py ADDED
@@ -0,0 +1,608 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ import math
5
+ import pickle
6
+
7
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
+ barrel_model = joblib.load('joblib_model/barrel_model.joblib')
14
+
15
+
16
+ def percentile(n):
17
+ def percentile_(x):
18
+ return np.nanpercentile(x, n)
19
+ percentile_.__name__ = 'percentile_%s' % n
20
+ return percentile_
21
+
22
+
23
+ def df_update(df=pd.DataFrame()):
24
+ df.loc[df['sz_top']==0,'sz_top'] = np.nan
25
+ df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
26
+
27
+
28
+ df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
29
+ if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
30
+ df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
31
+ df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
32
+
33
+
34
+ # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
35
+ if len(df.loc[(~df['px'].isna())&
36
+ (df['in_zone'].isna())&
37
+ (~df['sz_top'].isna())]) > 0:
38
+ print('We found missing data')
39
+ df.loc[(~df['px'].isna())&
40
+ (df['in_zone'].isna())&
41
+ (~df['sz_top'].isna()),'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
42
+ (df['in_zone'].isna())&
43
+ (~df['sz_top'].isna())][['px','pz','sz_top','sz_bot']].values)
44
+
45
+ hit_codes = ['single',
46
+ 'double','home_run', 'triple']
47
+
48
+ ab_codes = ['single', 'strikeout', 'field_out',
49
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
50
+ 'double', 'field_error', 'home_run', 'triple',
51
+ 'double_play',
52
+ 'fielders_choice_out', 'strikeout_double_play',
53
+ 'other_out','triple_play']
54
+
55
+
56
+ obp_true_codes = ['single', 'walk',
57
+ 'double','home_run', 'triple',
58
+ 'hit_by_pitch', 'intent_walk']
59
+
60
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
61
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
62
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
63
+ 'hit_by_pitch', 'double_play', 'intent_walk',
64
+ 'fielders_choice_out', 'strikeout_double_play',
65
+ 'sac_fly_double_play',
66
+ 'other_out','triple_play']
67
+
68
+
69
+ contact_codes = ['In play, no out',
70
+ 'Foul', 'In play, out(s)',
71
+ 'In play, run(s)',
72
+ 'Foul Bunt']
73
+
74
+
75
+
76
+ conditions_hit = [df.event_type.isin(hit_codes)]
77
+ choices_hit = [True]
78
+ df['hits'] = np.select(conditions_hit, choices_hit, default=False)
79
+
80
+ conditions_ab = [df.event_type.isin(ab_codes)]
81
+ choices_ab = [True]
82
+ df['ab'] = np.select(conditions_ab, choices_ab, default=False)
83
+
84
+ conditions_obp_true = [df.event_type.isin(obp_true_codes)]
85
+ choices_obp_true = [True]
86
+ df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
87
+
88
+ conditions_obp = [df.event_type.isin(obp_codes)]
89
+ choices_obp = [True]
90
+ df['obp'] = np.select(conditions_obp, choices_obp, default=False)
91
+
92
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
93
+
94
+ conditions_bip = [df.play_description.isin(bip_codes)]
95
+ choices_bip = [True]
96
+ df['bip'] = np.select(conditions_bip, choices_bip, default=False)
97
+
98
+ # conditions = [
99
+ # (df['launch_speed'].isna()),
100
+ # (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
101
+ # ]
102
+ df['bip_div'] = ~df.launch_speed.isna()
103
+ # choices = [False,True]
104
+ # df['barrel'] = np.select(conditions, choices, default=np.nan)
105
+ # df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
106
+ df['barrel'] = np.nan
107
+ if len(df.loc[(~df['launch_speed'].isnull())]) > 0:
108
+ df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']])
109
+
110
+
111
+ conditions_ss = [
112
+ (df['launch_angle'].isna()),
113
+ (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
114
+ ]
115
+
116
+ choices_ss = [False,True]
117
+ df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
118
+
119
+ conditions_hh = [
120
+ (df['launch_speed'].isna()),
121
+ (df['launch_speed'] >= 94.5 )
122
+ ]
123
+
124
+ choices_hh = [False,True]
125
+ df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
126
+
127
+
128
+ conditions_tb = [
129
+ (df['event_type']=='single'),
130
+ (df['event_type']=='double'),
131
+ (df['event_type']=='triple'),
132
+ (df['event_type']=='home_run'),
133
+ ]
134
+
135
+ choices_tb = [1,2,3,4]
136
+
137
+ df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
138
+
139
+ conditions_woba = [
140
+ (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
141
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
142
+ 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
143
+ 'sac_fly_double_play', 'other_out'])),
144
+ (df['event_type']=='walk'),
145
+ (df['event_type']=='hit_by_pitch'),
146
+ (df['event_type']=='single'),
147
+ (df['event_type']=='double'),
148
+ (df['event_type']=='triple'),
149
+ (df['event_type']=='home_run'),
150
+ ]
151
+
152
+ choices_woba = [0,
153
+ 0.696,
154
+ 0.726,
155
+ 0.883,
156
+ 1.244,
157
+ 1.569,
158
+ 2.004]
159
+
160
+ df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
161
+
162
+
163
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
164
+ 'double', 'sac_fly', 'force_out', 'home_run',
165
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
166
+ 'triple', 'sac_bunt', 'double_play',
167
+ 'fielders_choice_out', 'strikeout_double_play',
168
+ 'sac_fly_double_play', 'other_out']
169
+
170
+
171
+
172
+
173
+
174
+
175
+ conditions_woba_code = [
176
+ (df['event_type'].isin(woba_codes))
177
+ ]
178
+
179
+ choices_woba_code = [1]
180
+
181
+ df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
182
+
183
+
184
+ df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
185
+
186
+ #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
187
+
188
+ # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
189
+ # df['in_zone_3'] = df['in_zone_2'] < 10
190
+ # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
191
+
192
+
193
+ df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
194
+ df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
195
+ df['swings'] = [1 if x == True else 0 for x in df.is_swing]
196
+
197
+
198
+ df['out_zone'] = df.in_zone == False
199
+ df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
200
+ df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
201
+ df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
202
+ df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
203
+
204
+ df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
205
+ df['bb'] = df.event_type.isin(['walk','intent_walk'])
206
+
207
+ df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
208
+ df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
209
+
210
+ df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
211
+ df['pitches'] = [1 if x else 0 for x in df.is_pitch]
212
+
213
+
214
+ df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
215
+
216
+
217
+ pitch_cat = {'FA':'Fastball',
218
+ 'FF':'Fastball',
219
+ 'FT':'Fastball',
220
+ 'FC':'Fastball',
221
+ 'FS':'Off-Speed',
222
+ 'FO':'Off-Speed',
223
+ 'SI':'Fastball',
224
+ 'ST':'Breaking',
225
+ 'SL':'Breaking',
226
+ 'CU':'Breaking',
227
+ 'KC':'Breaking',
228
+ 'SC':'Off-Speed',
229
+ 'GY':'Off-Speed',
230
+ 'SV':'Breaking',
231
+ 'CS':'Breaking',
232
+ 'CH':'Off-Speed',
233
+ 'KN':'Off-Speed',
234
+ 'EP':'Breaking',
235
+ 'UN':np.nan,
236
+ 'IN':np.nan,
237
+ 'PO':np.nan,
238
+ 'AB':np.nan,
239
+ 'AS':np.nan,
240
+ 'NP':np.nan}
241
+ df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
242
+ df['average'] = 'average'
243
+
244
+ df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
245
+ df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
246
+ df.loc[df['trajectory'] == '','trajectory'] = np.nan
247
+ df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
248
+ df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
249
+
250
+ df['attack_zone'] = np.nan
251
+
252
+
253
+
254
+ df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
255
+
256
+
257
+
258
+ df['heart'] = df['attack_zone'] == 0
259
+ df['shadow'] = df['attack_zone'] == 1
260
+ df['chase'] = df['attack_zone'] == 2
261
+ df['waste'] = df['attack_zone'] == 3
262
+
263
+ df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
264
+ df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
265
+ df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
266
+ df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
267
+
268
+ df['xwoba'] = np.nan
269
+ df['xwoba_contact'] = np.nan
270
+
271
+ if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0:
272
+
273
+
274
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
275
+
276
+ ## Assign a value of 0.696 to every walk in the dataset
277
+ df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696
278
+
279
+ ## Assign a value of 0.726 to every hit by pitch in the dataset
280
+ df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726
281
+
282
+ ## Assign a value of 0 to every Strikeout in the dataset
283
+ df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0
284
+
285
+
286
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
287
+
288
+
289
+ return df
290
+
291
+ def df_update_summ(df=pd.DataFrame()):
292
+ df_summ = df.groupby(['batter_id','batter_name']).agg(
293
+ pa = ('pa','sum'),
294
+ ab = ('ab','sum'),
295
+ obp_pa = ('obp','sum'),
296
+ hits = ('hits','sum'),
297
+ on_base = ('on_base','sum'),
298
+ k = ('k','sum'),
299
+ bb = ('bb','sum'),
300
+ bb_minus_k = ('bb_minus_k','sum'),
301
+ csw = ('csw','sum'),
302
+ bip = ('bip','sum'),
303
+ bip_div = ('bip_div','sum'),
304
+ tb = ('tb','sum'),
305
+ woba = ('woba','sum'),
306
+ woba_contact = ('woba_contact','sum'),
307
+ xwoba = ('xwoba','sum'),
308
+ xwoba_contact = ('xwoba_contact','sum'),
309
+ woba_codes = ('woba_codes','sum'),
310
+ hard_hit = ('hard_hit','sum'),
311
+ barrel = ('barrel','sum'),
312
+ sweet_spot = ('sweet_spot','sum'),
313
+ max_launch_speed = ('launch_speed','max'),
314
+ launch_speed_90 = ('launch_speed',percentile(90)),
315
+ launch_speed = ('launch_speed','mean'),
316
+ launch_angle = ('launch_angle','mean'),
317
+ pitches = ('is_pitch','sum'),
318
+ swings = ('swings','sum'),
319
+ in_zone = ('in_zone','sum'),
320
+ out_zone = ('out_zone','sum'),
321
+ whiffs = ('whiffs','sum'),
322
+ zone_swing = ('zone_swing','sum'),
323
+ zone_contact = ('zone_contact','sum'),
324
+ ozone_swing = ('ozone_swing','sum'),
325
+ ozone_contact = ('ozone_contact','sum'),
326
+ ground_ball = ('trajectory_ground_ball','sum'),
327
+ line_drive = ('trajectory_line_drive','sum'),
328
+ fly_ball =('trajectory_fly_ball','sum'),
329
+ pop_up = ('trajectory_popup','sum'),
330
+ attack_zone = ('attack_zone','count'),
331
+ heart = ('heart','sum'),
332
+ shadow = ('shadow','sum'),
333
+ chase = ('chase','sum'),
334
+ waste = ('waste','sum'),
335
+ heart_swing = ('heart_swing','sum'),
336
+ shadow_swing = ('shadow_swing','sum'),
337
+ chase_swing = ('chase_swing','sum'),
338
+ waste_swing = ('waste_swing','sum'),
339
+ ).reset_index()
340
+ return df_summ
341
+
342
+ def df_update_summ_avg(df=pd.DataFrame()):
343
+ df_summ_avg = df.groupby(['average']).agg(
344
+ pa = ('pa','sum'),
345
+ ab = ('ab','sum'),
346
+ obp_pa = ('obp','sum'),
347
+ hits = ('hits','sum'),
348
+ on_base = ('on_base','sum'),
349
+ k = ('k','sum'),
350
+ bb = ('bb','sum'),
351
+ bb_minus_k = ('bb_minus_k','sum'),
352
+ csw = ('csw','sum'),
353
+ bip = ('bip','sum'),
354
+ bip_div = ('bip_div','sum'),
355
+ tb = ('tb','sum'),
356
+ woba = ('woba','sum'),
357
+ woba_contact = ('woba_contact','sum'),
358
+ xwoba = ('xwoba','sum'),
359
+ xwoba_contact = ('xwoba_contact','sum'),
360
+ woba_codes = ('woba_codes','sum'),
361
+ hard_hit = ('hard_hit','sum'),
362
+ barrel = ('barrel','sum'),
363
+ sweet_spot = ('sweet_spot','sum'),
364
+ max_launch_speed = ('launch_speed','max'),
365
+ launch_speed_90 = ('launch_speed',percentile(90)),
366
+ launch_speed = ('launch_speed','mean'),
367
+ launch_angle = ('launch_angle','mean'),
368
+ pitches = ('is_pitch','sum'),
369
+ swings = ('swings','sum'),
370
+ in_zone = ('in_zone','sum'),
371
+ out_zone = ('out_zone','sum'),
372
+ whiffs = ('whiffs','sum'),
373
+ zone_swing = ('zone_swing','sum'),
374
+ zone_contact = ('zone_contact','sum'),
375
+ ozone_swing = ('ozone_swing','sum'),
376
+ ozone_contact = ('ozone_contact','sum'),
377
+ ground_ball = ('trajectory_ground_ball','sum'),
378
+ line_drive = ('trajectory_line_drive','sum'),
379
+ fly_ball =('trajectory_fly_ball','sum'),
380
+ pop_up = ('trajectory_popup','sum'),
381
+ attack_zone = ('attack_zone','count'),
382
+ heart = ('heart','sum'),
383
+ shadow = ('shadow','sum'),
384
+ chase = ('chase','sum'),
385
+ waste = ('waste','sum'),
386
+ heart_swing = ('heart_swing','sum'),
387
+ shadow_swing = ('shadow_swing','sum'),
388
+ chase_swing = ('chase_swing','sum'),
389
+ waste_swing = ('waste_swing','sum'),
390
+
391
+
392
+
393
+
394
+ ).reset_index()
395
+ return df_summ_avg
396
+
397
+ def df_summ_changes(df_summ=pd.DataFrame()):
398
+ df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
399
+ df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
400
+ df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
401
+
402
+ df_summ['ops'] = df_summ['obp']+df_summ['slg']
403
+
404
+ df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
405
+ df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
406
+ df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
407
+
408
+ df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
409
+
410
+
411
+
412
+
413
+ df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
414
+
415
+
416
+ df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
417
+
418
+ df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
419
+ df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
420
+ #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
421
+ df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
422
+
423
+
424
+ df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
425
+
426
+ df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
427
+
428
+ df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
429
+
430
+ df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
431
+
432
+ df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
433
+
434
+ df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
435
+
436
+ df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
437
+
438
+ df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
439
+
440
+ df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
441
+
442
+ df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
443
+
444
+ df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
445
+
446
+ df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
447
+
448
+ df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
449
+
450
+
451
+
452
+ df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
453
+
454
+ df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
455
+
456
+ df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
457
+
458
+ df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
459
+
460
+
461
+ df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
462
+
463
+ df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
464
+
465
+ df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
466
+
467
+ df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
468
+
469
+
470
+
471
+
472
+ df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
473
+ df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
474
+
475
+ df_summ = df_summ.dropna(subset=['bip'])
476
+ return df_summ
477
+
478
+ def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
479
+ df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
480
+ df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
481
+ df_summ_player = df_summ.xs(batter_select,level=0)
482
+ df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
483
+ return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
484
+
485
+ def df_summ_batter_pitch_up(df=pd.DataFrame()):
486
+ df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg(
487
+ pa = ('pa','sum'),
488
+ ab = ('ab','sum'),
489
+ obp_pa = ('obp','sum'),
490
+ hits = ('hits','sum'),
491
+ on_base = ('on_base','sum'),
492
+ k = ('k','sum'),
493
+ bb = ('bb','sum'),
494
+ bb_minus_k = ('bb_minus_k','sum'),
495
+ csw = ('csw','sum'),
496
+ bip = ('bip','sum'),
497
+ bip_div = ('bip_div','sum'),
498
+ tb = ('tb','sum'),
499
+ woba = ('woba','sum'),
500
+ woba_contact = ('xwoba_contact','sum'),
501
+ xwoba = ('xwoba','sum'),
502
+ xwoba_contact = ('xwoba','sum'),
503
+ woba_codes = ('woba_codes','sum'),
504
+ hard_hit = ('hard_hit','sum'),
505
+ barrel = ('barrel','sum'),
506
+ sweet_spot = ('sweet_spot','sum'),
507
+ max_launch_speed = ('launch_speed','max'),
508
+ launch_speed_90 = ('launch_speed',percentile(90)),
509
+ launch_speed = ('launch_speed','mean'),
510
+ launch_angle = ('launch_angle','mean'),
511
+ pitches = ('is_pitch','sum'),
512
+ swings = ('swings','sum'),
513
+ in_zone = ('in_zone','sum'),
514
+ out_zone = ('out_zone','sum'),
515
+ whiffs = ('whiffs','sum'),
516
+ zone_swing = ('zone_swing','sum'),
517
+ zone_contact = ('zone_contact','sum'),
518
+ ozone_swing = ('ozone_swing','sum'),
519
+ ozone_contact = ('ozone_contact','sum'),
520
+ ground_ball = ('trajectory_ground_ball','sum'),
521
+ line_drive = ('trajectory_line_drive','sum'),
522
+ fly_ball =('trajectory_fly_ball','sum'),
523
+ pop_up = ('trajectory_popup','sum'),
524
+ attack_zone = ('attack_zone','count'),
525
+ heart = ('heart','sum'),
526
+ shadow = ('shadow','sum'),
527
+ chase = ('chase','sum'),
528
+ waste = ('waste','sum'),
529
+ heart_swing = ('heart_swing','sum'),
530
+ shadow_swing = ('shadow_swing','sum'),
531
+ chase_swing = ('chase_swing','sum'),
532
+ waste_swing = ('waste_swing','sum'),
533
+ ).reset_index()
534
+
535
+ #return df_summ_batter_pitch
536
+ df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
537
+ df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
538
+ df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
539
+
540
+ df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
541
+
542
+ df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
543
+ df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
544
+ df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
545
+
546
+ df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
547
+
548
+
549
+
550
+
551
+ df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
552
+
553
+
554
+ df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
555
+
556
+ df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
557
+ df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
558
+ #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
559
+ df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
560
+
561
+
562
+ df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
563
+
564
+ df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
565
+
566
+ df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
567
+
568
+ df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
569
+
570
+ df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
571
+
572
+ df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
573
+
574
+ df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
575
+
576
+ df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
577
+
578
+ df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
579
+
580
+ df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
581
+
582
+ df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
583
+
584
+ df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
585
+
586
+ df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
587
+
588
+
589
+ df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
590
+
591
+ df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
592
+
593
+ df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
594
+
595
+ df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
596
+
597
+
598
+
599
+
600
+ df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
601
+ df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
602
+
603
+
604
+
605
+
606
+ df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
607
+
608
+ return df_summ_batter_pitch