nesticot commited on
Commit
0f0d254
·
verified ·
1 Parent(s): e4ded93

Delete pitch_summary_functions.py

Browse files
Files changed (1) hide show
  1. pitch_summary_functions.py +0 -1005
pitch_summary_functions.py DELETED
@@ -1,1005 +0,0 @@
1
-
2
- import pandas as pd
3
- import numpy as np
4
- import json
5
- from matplotlib.ticker import FuncFormatter
6
- from matplotlib.ticker import MaxNLocator
7
- import math
8
- from matplotlib.patches import Ellipse
9
- import matplotlib.transforms as transforms
10
- import matplotlib.colors
11
- import matplotlib.colors as mcolors
12
- import seaborn as sns
13
- import matplotlib.pyplot as plt
14
- import requests
15
-
16
- font_properties = {'family': 'calibi', 'size': 12}
17
- font_properties_titles = {'family': 'calibi', 'size': 20}
18
- font_properties_axes = {'family': 'calibi', 'size': 16}
19
-
20
-
21
- colour_palette = ['#FFB000','#648FFF','#785EF0',
22
- '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
23
- season_start = '2024-03-20'
24
- season_end = '2024-09-29'
25
- season_fg=2024
26
- chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
27
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
28
-
29
-
30
- chadwick_df_small = pd.DataFrame(data={
31
- 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
32
- 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
33
- 'Name':[x['PlayerName'] for x in chad_fg['data']],
34
- })
35
-
36
- pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict()
37
- mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
38
-
39
-
40
- ### DF UPDATE CODE ###
41
- def df_update_code(df):
42
- print('Starting')
43
- #df = pd.read_csv('2024_spring_data.csv',index_col=[0])
44
- print('Starting')
45
-
46
-
47
- df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
48
- df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
49
- df['vz_f'] = (df['vz0']) + (df['az'] * df['t'])
50
- df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi)
51
-
52
- #df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
53
- #df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
54
- df['vx_f'] = (df['vx0']) + (df['ax'] * df['t'])
55
- df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi)
56
-
57
-
58
-
59
- end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
60
- 'double', 'sac_fly', 'force_out', 'home_run',
61
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
62
- 'triple', 'sac_bunt', 'double_play', 'intent_walk',
63
- 'fielders_choice_out', 'strikeout_double_play',
64
- 'sac_fly_double_play', 'catcher_interf', 'other_out']
65
-
66
-
67
-
68
- df['pa'] = df.event_type.isin(end_codes)
69
- #df['pa'] = 1
70
- df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()])))
71
- df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()])))
72
- df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
73
-
74
- df = df.drop_duplicates(subset=['play_id'])
75
- df = df.dropna(subset=['start_speed'])
76
-
77
-
78
-
79
- swing_codes = ['Swinging Strike', 'In play, no out',
80
- 'Foul', 'In play, out(s)',
81
- 'In play, run(s)', 'Swinging Strike (Blocked)',
82
- 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
83
-
84
- swings_in = ['Swinging Strike', 'In play, no out',
85
- 'Foul', 'In play, out(s)',
86
- 'In play, run(s)', 'Swinging Strike (Blocked)',
87
- 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
88
-
89
- swing_strike_codes = ['Swinging Strike',
90
- 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
91
-
92
-
93
- contact_codes = ['In play, no out',
94
- 'Foul', 'In play, out(s)',
95
- 'In play, run(s)',
96
- 'Foul Bunt']
97
-
98
- codes_in = ['In play, out(s)',
99
- 'Swinging Strike',
100
- 'Ball',
101
- 'Foul',
102
- 'In play, no out',
103
- 'Called Strike',
104
- 'Foul Tip',
105
- 'In play, run(s)',
106
- 'Hit By Pitch',
107
- 'Ball In Dirt',
108
- 'Pitchout',
109
- 'Swinging Strike (Blocked)',
110
- 'Foul Bunt',
111
- 'Missed Bunt',
112
- 'Foul Pitchout',
113
- 'Intent Ball',
114
- 'Swinging Pitchout']
115
-
116
- df['in_zone'] = df['zone'] < 10
117
-
118
-
119
- df = df.drop_duplicates(subset=['play_id'])
120
-
121
-
122
-
123
- df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone'])
124
-
125
- df_codes['bip'] = ~df_codes.launch_speed.isna()
126
- conditions = [
127
- (df_codes['launch_speed'].isna()),
128
- (df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50)
129
- ]
130
-
131
- choices = [False,True]
132
- df_codes['barrel'] = np.select(conditions, choices, default=np.nan)
133
-
134
- conditions_ss = [
135
- (df_codes['launch_angle'].isna()),
136
- (df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 )
137
- ]
138
-
139
- choices_ss = [False,True]
140
- df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
141
- conditions_hh = [
142
- (df_codes['launch_speed'].isna()),
143
- (df_codes['launch_speed'] >= 94.5 )
144
- ]
145
-
146
- choices_hh = [False,True]
147
- df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
148
-
149
-
150
- conditions_tb = [
151
- (df_codes['event_type']=='single'),
152
- (df_codes['event_type']=='double'),
153
- (df_codes['event_type']=='triple'),
154
- (df_codes['event_type']=='home_run'),
155
- ]
156
-
157
- choices_tb = [1,2,3,4]
158
-
159
- df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
160
-
161
- conditions_woba = [
162
- (df_codes['event_type']=='walk'),
163
- (df_codes['event_type']=='hit_by_pitch'),
164
- (df_codes['event_type']=='single'),
165
- (df_codes['event_type']=='double'),
166
- (df_codes['event_type']=='triple'),
167
- (df_codes['event_type']=='home_run'),
168
- ]
169
-
170
- choices_woba = [0.705,
171
- 0.688,
172
- 0.897,
173
- 1.233,
174
- 1.612,
175
- 2.013]
176
-
177
- df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
178
-
179
-
180
- woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
181
- 'double', 'sac_fly', 'force_out', 'home_run',
182
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
183
- 'triple', 'sac_bunt', 'double_play',
184
- 'fielders_choice_out', 'strikeout_double_play',
185
- 'sac_fly_double_play', 'other_out']
186
-
187
-
188
-
189
-
190
-
191
- conditions_woba_code = [
192
- (df_codes['event_type'].isin(woba_codes))
193
- ]
194
-
195
- choices_woba_code = [1]
196
-
197
- df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
198
-
199
-
200
- #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
201
-
202
-
203
-
204
- #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
205
- df_codes['pitches'] = 1
206
- df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code]
207
- df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code]
208
- df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description]
209
-
210
- df_codes['out_zone'] = df_codes.in_zone == False
211
- df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1)
212
- df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
213
- df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1)
214
- df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
215
-
216
- return df_codes
217
-
218
- ### GET COLOURS##
219
- def get_color(value,normalize,cmap_sum):
220
- color = cmap_sum(normalize(value))
221
- return mcolors.to_hex(color)
222
-
223
- ### PERCENTILE ###
224
- def percentile(n):
225
- def percentile_(x):
226
- return x.quantile(n)
227
- percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
228
- return percentile_
229
-
230
- ### TJ STUFF+ DF CLEAN ###
231
- def df_clean(df):
232
- df_copy = df.copy()
233
- df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
234
- df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
235
- df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
236
-
237
- df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']]
238
- df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
239
- #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True)
240
- #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True)
241
-
242
- df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI',
243
- #'KC':'CU',
244
- 'SV':'SL',
245
- 'FO':'FS'})
246
-
247
- df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg(
248
- fb_velo = ('start_speed','mean'),
249
- fb_max_ivb = ('ivb',percentile(0.9)),
250
- fb_max_x = ('hb',percentile(0.9)),
251
- fb_min_x = ('hb',percentile(0.1)),
252
- fb_max_velo = ('start_speed',percentile(0.9)),
253
- fb_axis = ('spin_direction','mean'),
254
- )
255
-
256
- df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
257
-
258
- df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
259
- df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
260
- df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x'])
261
- df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
262
- df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
263
- df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
264
-
265
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
266
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
267
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
268
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
269
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
270
- # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
271
-
272
-
273
- df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
274
- df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
275
-
276
- df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
277
- df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
278
-
279
- df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
280
- df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
281
- df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
282
- df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
283
-
284
- #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
285
- #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
286
- df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
287
- df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
288
-
289
- # df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
290
- # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
291
-
292
- # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
293
- # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
294
-
295
- df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0)
296
- return df_copy
297
-
298
- ### PITCH COLOURS ###
299
- pitch_colours = {
300
- 'Four-Seam Fastball':'#FF007D',#BC136F
301
- 'Sinker':'#98165D',#DC267F
302
- 'Cutter':'#BE5FA0',
303
-
304
- 'Changeup':'#F79E70',#F75233
305
- 'Splitter':'#FE6100',#F75233
306
- 'Screwball':'#F08223',
307
- 'Forkball':'#FFB000',
308
-
309
- 'Slider':'#67E18D',#1BB999#785EF0
310
- 'Sweeper':'#1BB999',#37CD85#904039
311
- 'Slurve':'#376748',#785EF0#549C07#BEABD8
312
-
313
- 'Knuckle Curve':'#311D8B',
314
- 'Curveball':'#3025CE',
315
- 'Slow Curve':'#274BFC',
316
- 'Eephus':'#648FFF',
317
-
318
- 'Knuckleball':'#867A08',
319
-
320
- 'Pitch Out':'#472C30',
321
- 'Other':'#9C8975',
322
- }
323
-
324
- ### PITCH ELLIPSE ###
325
- def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
326
- """
327
- Create a plot of the covariance confidence ellipse of *x* and *y*.
328
-
329
- Parameters
330
- ----------
331
- x, y : array-like, shape (n, )
332
- Input data.
333
-
334
- ax : matplotlib.axes.Axes
335
- The axes object to draw the ellipse into.
336
-
337
- n_std : float
338
- The number of standard deviations to determine the ellipse's radiuses.
339
-
340
- **kwargs
341
- Forwarded to `~matplotlib.patches.Ellipse`
342
-
343
- Returns
344
- -------
345
- matplotlib.patches.Ellipse
346
- """
347
-
348
- if x.size != y.size:
349
- raise ValueError("x and y must be the same size")
350
- try:
351
- cov = np.cov(x, y)
352
- pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
353
- # Using a special case to obtain the eigenvalues of this
354
- # two-dimensional dataset.
355
- ell_radius_x = np.sqrt(1 + pearson)
356
- ell_radius_y = np.sqrt(1 - pearson)
357
- ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
358
- facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
359
-
360
-
361
- # Calculating the standard deviation of x from
362
- # the squareroot of the variance and multiplying
363
- # with the given number of standard deviations.
364
- scale_x = np.sqrt(cov[0, 0]) * n_std
365
- mean_x = np.mean(x)
366
-
367
-
368
- # calculating the standard deviation of y ...
369
- scale_y = np.sqrt(cov[1, 1]) * n_std
370
- mean_y = np.mean(y)
371
-
372
-
373
- transf = transforms.Affine2D() \
374
- .rotate_deg(45) \
375
- .scale(scale_x, scale_y) \
376
- .translate(mean_x, mean_y)
377
-
378
-
379
-
380
- ellipse.set_transform(transf + ax.transData)
381
- except ValueError:
382
- return
383
-
384
- return ax.add_patch(ellipse)
385
-
386
- # DEFINE STRIKE ZONE
387
- strike_zone = pd.DataFrame({
388
- 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
389
- 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
390
- })
391
-
392
- ### STRIKE ZONE ###
393
- def draw_line(axis,alpha_spot=1,catcher_p = True):
394
-
395
- axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,)
396
-
397
- # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
398
- # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
399
- # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
400
- # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
401
- if catcher_p:
402
- # Add dashed line
403
- # Add home plate
404
- axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
405
- axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
406
- axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
407
- axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
408
- axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
409
- else:
410
- axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
411
- axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
412
- axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
413
- axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
414
- axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
415
-
416
-
417
-
418
- ### FANGRAPHS STATS DICT ###
419
- fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
420
- 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
421
- 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
422
- 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
423
- 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
424
- 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
425
- 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
426
- 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
427
- 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
428
- 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
429
- 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
430
- 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
431
- 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
432
- 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
433
- 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
434
- 'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
435
- '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
436
- '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
437
- 'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
438
- 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
439
- 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
440
- 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
441
- 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
442
- 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
443
- 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
444
- 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
445
- 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
446
- 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
447
- 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
448
- 'G':{'table_header':'$\\bf{G}$','format':'.0f',} }
449
-
450
-
451
- ## Fangraphs Table
452
-
453
- ### FANGRAPHS SPLITS SCRAPE ###
454
- split_dict = {'all':[],
455
- 'left':['5'],
456
- 'right':['6']
457
- }
458
-
459
- def fangraphs_scrape(pitcher_id=808967,
460
- split='all',
461
- start_date='2024-03-20',
462
- end_date='2024-09-29'):
463
-
464
-
465
- url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
466
-
467
- payload = {
468
- "strPlayerId": str(mlb_fg_dicts[pitcher_id]),
469
- "strSplitArr": split_dict[split],
470
- "strGroup": "season",
471
- "strPosition": "P",
472
- "strType": "2",
473
- "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
474
- "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
475
- "strSplitTeams": False,
476
- "dctFilters": [],
477
- "strStatType": "player",
478
- "strAutoPt": False,
479
- "arrPlayerId": [],
480
- "strSplitArrPitch": [],
481
- "arrWxTemperature": None,
482
- "arrWxPressure": None,
483
- "arrWxAirDensity": None,
484
- "arrWxElevation": None,
485
- "arrWxWindSpeed": None
486
- }
487
- json_payload = json.dumps(payload)
488
- headers = {'Content-Type': 'application/json'}
489
- response = requests.post(url, data=json_payload, headers=headers)
490
- data_pull = response.json()['data'][0]
491
-
492
- payload_advanced = {
493
- "strPlayerId": str(mlb_fg_dicts[pitcher_id]),
494
- "strSplitArr": split_dict[split],
495
- "strGroup": "season",
496
- "strPosition": "P",
497
- "strType": "1",
498
- "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
499
- "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
500
- "strSplitTeams": False,
501
- "dctFilters": [],
502
- "strStatType": "player",
503
- "strAutoPt": False,
504
- "arrPlayerId": [],
505
- "strSplitArrPitch": [],
506
- "arrWxTemperature": None,
507
- "arrWxPressure": None,
508
- "arrWxAirDensity": None,
509
- "arrWxElevation": None,
510
- "arrWxWindSpeed": None
511
- }
512
-
513
- json_payload_advanced = json.dumps(payload_advanced)
514
- headers = {'Content-Type': 'application/json'}
515
- response_advanced = requests.post(url, data=json_payload_advanced, headers=headers)
516
- data_pull_advanced = response_advanced.json()['data'][0]
517
-
518
- data_pull.update(data_pull_advanced)
519
-
520
- return data_pull
521
-
522
-
523
- ### FANGRAPHS TABLE PLOT ###
524
- def fangraphs_table(data,
525
- stats,
526
- ax):
527
-
528
-
529
- fg_values = [data[x] if x in data else '---' for x in stats]
530
- df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0])
531
-
532
- df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg]
533
- table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center',
534
- bbox=[0.04, 0.2, 0.92, 0.8])
535
-
536
- min_font_size = 20
537
- table_fg.set_fontsize(min_font_size)
538
-
539
-
540
- new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats]
541
- # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
542
- for i, col_name in enumerate(new_column_names):
543
- table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
544
-
545
- ax.axis('off')
546
-
547
-
548
- return table_fg
549
-
550
- ### VELOCITY KDES ###
551
- def velocity_kdes(df,
552
- ax,
553
- gs,
554
- gs_list,
555
- fig):
556
-
557
- sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
558
-
559
- # Get the list of items ordered from most to least frequent
560
- items_in_order = sorted_value_counts.index.tolist()
561
-
562
- # Create the inner subplot inside the outer subplot
563
- import matplotlib.gridspec as gridspec
564
- ax.axis ('off')
565
- #ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes)
566
- ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
567
-
568
- inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list])
569
- ax_top = []
570
- for inner in inner_grid_1:
571
- ax_top.append(fig.add_subplot(inner))
572
-
573
-
574
- ax_number = 0
575
-
576
- for i in items_in_order[0:]:
577
- if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same
578
- print('just')
579
- ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4,
580
- color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20)
581
- # ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4)
582
- else:
583
- sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True,
584
- clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()),
585
- color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]])
586
- ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5)
587
- ax_top[ax_number].set_xlabel('')
588
- ax_top[ax_number].set_ylabel('')
589
- if ax_number < len(items_in_order)-1:
590
- ax_top[ax_number].spines['top'].set_visible(False)
591
- ax_top[ax_number].spines['right'].set_visible(False)
592
- ax_top[ax_number].spines['left'].set_visible(False)
593
- ax_top[ax_number].tick_params(axis='x', colors='none')
594
-
595
-
596
- ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))
597
- ax_top[ax_number].set_yticks([])
598
- ax_top[ax_number].grid(axis='x', linestyle='--')
599
- ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes,
600
- fontsize=14, va='center', ha='right')
601
- ax_number = ax_number + 1
602
- ax_top[-1].spines['top'].set_visible(False)
603
- ax_top[-1].spines['right'].set_visible(False)
604
- ax_top[-1].spines['left'].set_visible(False)
605
-
606
-
607
- ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)))
608
- ax_top[-1].set_xlabel('Velocity (mph)')
609
-
610
- ### TJ STUFF+ ROLLING ###
611
- def tj_stuff_roling(df,
612
- window,
613
- ax):
614
- ## Velocity Plot
615
- sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
616
-
617
- # Get the list of items ordered from most to least frequent
618
- items_in_order = sorted_value_counts.index.tolist()
619
-
620
-
621
- for i in items_in_order:
622
- if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window:
623
- sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1),
624
- y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window,
625
- color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],
626
- ax=ax,linewidth=3)
627
-
628
- # Adjust x-axis limits to start from 1
629
- ax.set_xlim(window,max(df['pitch_type_count_each']))
630
- ax.set_ylim(70,130)
631
- #ax.get_legend().remove()
632
- ax.set_xlabel('Pitches', fontdict=font_properties_axes)
633
- ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
634
- ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles)
635
- # ax.axis('square')
636
- # ax.set_xlim(left=1)
637
- ax.xaxis.set_major_locator(MaxNLocator(integer=True))
638
-
639
- ### BREAK PLOT ###
640
- def break_plot(df,
641
- ax):
642
-
643
- label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
644
- j = 0
645
- for label in label_labels:
646
- subset = df[df['pitch_description'] == label]
647
- print(label)
648
- if len(subset) > 4:
649
- if df['pitcher_hand'].values[0] == 'R':
650
- subset['hb'] = subset['hb']*1
651
- if df['pitcher_hand'].values[0] == 'L':
652
- subset['hb'] = subset['hb']*1
653
- subset['ivb'] = subset['ivb']*1
654
-
655
- try:
656
- confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2)
657
- except ValueError:
658
- return
659
- j=j+1
660
- else:
661
- j=j+1
662
-
663
- if df['pitcher_hand'].values[0] == 'R':
664
- sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
665
- if df['pitcher_hand'].values[0] == 'L':
666
- sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
667
-
668
- ax.set_xlim((-25,25))
669
- ax.set_ylim((-25,25))
670
-
671
- ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
672
- ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
673
- ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
674
- ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
675
- ax.set_title("Pitch Breaks",fontdict=font_properties_titles)
676
-
677
-
678
- ax.get_legend().remove()
679
-
680
-
681
- # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties)
682
- ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
683
-
684
- # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties)
685
- ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
686
-
687
-
688
-
689
- #ax1.set_aspect('equal', adjustable='box')
690
- if df['pitcher_hand'].values[0] == 'R':
691
- ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom',
692
- bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
693
- ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom',
694
- bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
695
- #ax.invert_xaxis()
696
- if df['pitcher_hand'].values[0] == 'L':
697
- ax.invert_xaxis()
698
- ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom',
699
- bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
700
- ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom',
701
- bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
702
- ax.set_aspect('equal', adjustable='box')
703
- #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
704
- ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
705
- ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
706
-
707
- ### TABLE SUMMARY ###
708
- def table_summary(df,
709
- pitcher_id,
710
- ax,
711
- df_group,
712
- df_group_all,
713
- statcast_pitch_summary):
714
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
715
-
716
- ax.axis('off')
717
- df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']]
718
- #(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
719
- clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
720
- # print('Clocks')
721
- # print(clock_time)
722
- clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame()
723
- df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock'])
724
-
725
-
726
- plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values(
727
- by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb',
728
- 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
729
- 'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']]
730
-
731
- # if df['pitcher_hand'].values[0] == 'L':
732
- # plot_table['hb'] = plot_table['hb']*-1
733
-
734
- #if df['pitcher_hand'].values[0] == 'R':
735
- plot_table['horizontal_release'] = plot_table['horizontal_release']*-1
736
-
737
- plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum()
738
-
739
- plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb',
740
- 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
741
- 'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']]
742
-
743
- plot_table_all = pd.DataFrame(data={'pitch_description': 'All',
744
- 'pitches': plot_table['pitches'].sum(),
745
- 'pitch_percent': 1.0,
746
- 'start_speed': '—',
747
- 'ivb': '—',
748
- 'hb': '—',
749
- 'spin_rate': '—',
750
- 'vaa': '—',
751
- 'haa': '—',
752
- 'vertical_release': '—',
753
- 'horizontal_release': '—',
754
- 'extension': df['extension'].mean(),
755
- 'spin_direction_adj_clock': '—',
756
- 'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(),
757
- 'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0],
758
- 'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0],
759
- 'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0],
760
-
761
-
762
- },index=[0]
763
- )
764
-
765
- plot_table = pd.concat([plot_table,plot_table_all]).fillna('—')
766
-
767
-
768
-
769
- plt.rcParams['font.family'] = 'Calibri'
770
- table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center',
771
- colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8])
772
-
773
- min_font_size = 14
774
- # Set table properties
775
- table.auto_set_font_size(False)
776
- #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
777
- table.set_fontsize(min_font_size)
778
- table.scale(1, 0.5)
779
-
780
- min_font_size = 18
781
- # Set font size for values
782
- # Adjust the font size as needed
783
- for i in range(len(plot_table)+1):
784
- for j in range(len(plot_table.columns)):
785
- if i > 0: # Skip the header row
786
- cell = table.get_celld()[i, j]
787
- cell.set_fontsize(min_font_size)
788
-
789
-
790
- for i in range(len(plot_table)):
791
-
792
- if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All':
793
- table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color
794
- if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']:
795
- table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold')
796
- else:
797
- table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold')
798
- if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball':
799
- table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam')
800
-
801
- select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]]
802
-
803
- normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(),
804
- vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values
805
-
806
- if table.get_celld()[(i+1, 3)].get_text().get_text() != '—':
807
- table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
808
-
809
-
810
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
811
- normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1)
812
- if table.get_celld()[(i+1,11)].get_text().get_text() != '—':
813
- table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
814
-
815
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
816
- normalize = mcolors.Normalize(vmin=80, vmax=120)
817
- print(normalize)
818
- if table.get_celld()[(i+1,13)].get_text().get_text() != '—':
819
-
820
- table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
821
-
822
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
823
- normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3)
824
- if table.get_celld()[(i+1,14)].get_text().get_text() != '—':
825
- table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
826
-
827
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
828
- normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3)
829
- if table.get_celld()[(i+1,15)].get_text().get_text() != '—':
830
- table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
831
-
832
-
833
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
834
- normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3)
835
- if table.get_celld()[(i+1,16)].get_text().get_text() != '—':
836
- table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
837
-
838
- table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold')
839
-
840
-
841
- new_column_names = ['$\\bf{Pitch\ Name}$',
842
- '$\\bf{Count}$',
843
- '$\\bf{Pitch\%}$',
844
- '$\\bf{Velocity}$',
845
- '$\\bf{iVB}$',
846
- '$\\bf{HB}$',
847
- '$\\bf{Spin}$',
848
- '$\\bf{VAA}$',
849
- '$\\bf{HAA}$',
850
- '$\\bf{vRel}$',
851
- '$\\bf{hRel}$',
852
-
853
- '$\\bf{Ext.}$',
854
- '$\\bf{Axis}$',
855
- '$\\bf{tjStuff+}$',
856
- '$\\bf{Zone\%}$',
857
- '$\\bf{Chase\%}$',
858
- '$\\bf{Whiff\%}$',
859
- ]
860
-
861
- for i, col_name in enumerate(new_column_names):
862
- table.get_celld()[(0, i)].get_text().set_text(col_name)
863
-
864
- float_list = ['start_speed','ivb',
865
- 'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension']
866
- for fl in float_list:
867
- # Subset of column names
868
- subset_columns = [fl]
869
-
870
- # Get the list of column indices
871
- column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
872
-
873
- # # print(column_indices)
874
- for row_l in range(1,len(plot_table)+1):
875
- # print(row_l)
876
- if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
877
- # print()
878
- # print(fl)
879
- table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
880
-
881
-
882
-
883
- percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate']
884
- for fl in percent_list:
885
- # Subset of column names
886
- subset_columns = [fl]
887
-
888
- # Get the list of column indices
889
- column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
890
-
891
- # # print(column_indices)
892
- for row_l in range(1,len(plot_table)+1):
893
- # print(row_l)
894
- if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
895
-
896
- # print(fl)
897
- table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
898
-
899
-
900
- int_list = ['tj_stuff_plus','spin_rate']
901
- for fl in int_list:
902
- # Subset of column names
903
- subset_columns = [fl]
904
-
905
- # Get the list of column indices
906
- column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
907
-
908
- # # print(column_indices)
909
- for row_l in range(1,len(plot_table)+1):
910
- # print(row_l)
911
- if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
912
- # print(fl)
913
-
914
- table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
915
-
916
- return table
917
-
918
- ### GROUED IVB CREATION ###
919
- def group_ivb_update(df,
920
- agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']):
921
-
922
- grouped_ivb = df.groupby(agg_list).agg(
923
- pitches = ('start_speed','count'),
924
-
925
- start_speed = ('start_speed','mean'),
926
- ivb = ('ivb','mean'),
927
- hb = ('hb','mean'),
928
- spin_rate = ('spin_rate','mean'),
929
- vaa = ('vaa','mean'),
930
- haa = ('haa','mean'),
931
- horizontal_release = ('x0','mean'),
932
- vertical_release = ('z0','mean'),
933
- extension = ('extension','mean'),
934
- spin_direction = ('spin_direction','mean'),
935
- tj_stuff_plus = ('tj_stuff_plus','mean'),
936
- swings = ('swings','sum'),
937
- in_zone = ('in_zone','sum'),
938
- out_zone = ('out_zone','sum'),
939
- whiffs = ('whiffs','sum'),
940
- zone_swing = ('zone_swing','sum'),
941
- zone_contact = ('zone_contact','sum'),
942
- ozone_swing = ('ozone_swing','sum'),
943
- ozone_contact = ('ozone_contact','sum'),
944
- ).reset_index()
945
-
946
-
947
- grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
948
-
949
- grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
950
-
951
- grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
952
-
953
- grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))]
954
-
955
- grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
956
-
957
- grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
958
-
959
- grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))]
960
-
961
- grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
962
-
963
- return grouped_ivb
964
-
965
-
966
- ####LHH
967
- def location_plot(df,ax,hand):
968
- label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
969
- j = 0
970
- for label in label_labels:
971
-
972
- subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)]
973
- print(label)
974
- if len(subset) >= 5:
975
- confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3)
976
- j=j+1
977
- else:
978
- j=j+1
979
-
980
- pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg(
981
- pitches = ('start_speed','count'),
982
- px = ('px','mean'),
983
- pz = ('pz','mean')).reset_index()
984
-
985
- pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum()
986
-
987
-
988
- ## Location Plot
989
- sns.scatterplot(ax=ax,x=pitch_location_group['px'],
990
- y=pitch_location_group['pz'],
991
- hue=pitch_location_group['pitch_description'],
992
- palette=pitch_colours,ec='black',
993
- s=pitch_location_group['pitch_percent']*750,
994
- linewidth=2,
995
- zorder=2)
996
-
997
- ax.axis('square')
998
- draw_line(ax,alpha_spot=0.75,catcher_p=False)
999
- ax.axis('off')
1000
- ax.set_xlim((-2.75,2.75))
1001
- ax.set_ylim((-0.5,5))
1002
- if len(pitch_location_group['px'])>0:
1003
- ax.get_legend().remove()
1004
- ax.grid(False)
1005
- ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles)