Spaces:
Runtime error
Runtime error
Commit
·
3231b63
1
Parent(s):
1beb833
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- Dockerfile.txt +29 -0
- Notebook.ipynb +0 -0
- Source/Build/__pycache__/build.cpython-311.pyc +0 -0
- Source/Build/build.py +206 -0
- Source/Build/nfl_data_py +1 -0
- Source/Build/update.py +25 -0
- Source/Data/gbg.csv +3 -0
- Source/Data/gbg_and_odds.csv +3 -0
- Source/Data/gbg_and_odds_this_year.csv +3 -0
- Source/Data/gbg_this_year.csv +3 -0
- Source/Data/pbp.csv +3 -0
- Source/Data/pbp_this_year.csv +3 -0
- Source/Models/__init__.py +0 -0
- Source/Models/xgboost_ML_75.4%.json +0 -0
- Source/Models/xgboost_OU_59.3%.json +0 -0
- Source/Pickles/team_abbreviation_to_name.pkl +3 -0
- Source/Pickles/team_name_to_abbreviation.pkl +3 -0
- Source/Pickles/test_games_ML.pkl +3 -0
- Source/Pickles/test_games_OU.pkl +3 -0
- Source/Pickles/train_games_ML.pkl +3 -0
- Source/Pickles/train_games_OU.pkl +3 -0
- Source/Predict/__pycache__/predict.cpython-311.pyc +0 -0
- Source/Predict/predict.py +201 -0
- Source/Test/__init__.py +0 -0
- Source/Test/xgboost_ML.py +59 -0
- Source/Test/xgboost_ML_75.4%.png +0 -0
- Source/Test/xgboost_ML_75.4%_dark.png +0 -0
- Source/Test/xgboost_OU.py +59 -0
- Source/Test/xgboost_OU_59.3%.png +0 -0
- Source/Test/xgboost_OU_59.3%_dark.png +0 -0
- Source/Train/xgboost_ML.py +69 -0
- Source/Train/xgboost_OU.py +70 -0
- Static/Arizona Cardinals.webp +0 -0
- Static/Atlanta Falcons.webp +0 -0
- Static/Baltimore Ravens.webp +0 -0
- Static/Buffalo Bills.webp +0 -0
- Static/Carolina Panthers.webp +0 -0
- Static/Chicago Bears.webp +0 -0
- Static/Cincinnati Bengals.webp +0 -0
- Static/Cleveland Browns.webp +0 -0
- Static/Dallas Cowboys.webp +0 -0
- Static/Denver Broncos.webp +0 -0
- Static/Detroit Lions.webp +0 -0
- Static/Green Bay Packers.webp +0 -0
- Static/Houston Texans.webp +0 -0
- Static/Indianapolis Colts.webp +0 -0
- Static/Jacksonville Jaguars.webp +0 -0
- Static/Kansas City Chiefs.webp +0 -0
- Static/Las Vegas Raiders.webp +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
Dockerfile.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the official lightweight Python image.
|
| 2 |
+
FROM python:3.11
|
| 3 |
+
|
| 4 |
+
# Allow statements and log messages to immediately appear in the logs
|
| 5 |
+
ENV PYTHONUNBUFFERED True
|
| 6 |
+
|
| 7 |
+
# Copy local code to the container image.
|
| 8 |
+
ENV APP_HOME /app
|
| 9 |
+
WORKDIR $APP_HOME
|
| 10 |
+
COPY . ./
|
| 11 |
+
|
| 12 |
+
# Install production dependencies.
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Create a non-root user and switch to it
|
| 16 |
+
RUN useradd -m -u 1000 user
|
| 17 |
+
USER user
|
| 18 |
+
ENV HOME=/home/user \
|
| 19 |
+
PATH=/home/user/.local/bin:$PATH
|
| 20 |
+
|
| 21 |
+
# Set work directory
|
| 22 |
+
WORKDIR $APP_HOME
|
| 23 |
+
|
| 24 |
+
# Change ownership of app files to the new user
|
| 25 |
+
COPY --chown=user . $HOME/app
|
| 26 |
+
|
| 27 |
+
# Run the web service on container startup.
|
| 28 |
+
CMD exec gunicorn --bind 0.0.0.0:7860 --workers 9 --threads 16 --timeout 120 main:app
|
| 29 |
+
|
Notebook.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source/Build/__pycache__/build.cpython-311.pyc
ADDED
|
Binary file (20.8 kB). View file
|
|
|
Source/Build/build.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nfl_data_py.nfl_data_py as nfl
|
| 2 |
+
from tqdm import tqdm
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
pd.set_option('chained_assignment',None)
|
| 6 |
+
pd.set_option('display.max_columns',None)
|
| 7 |
+
import os
|
| 8 |
+
import datetime as dt
|
| 9 |
+
|
| 10 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
parent_directory = os.path.dirname(current_directory)
|
| 12 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 13 |
+
|
| 14 |
+
def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
|
| 15 |
+
"""
|
| 16 |
+
Pull data from nflFastR's Github repo.
|
| 17 |
+
If you choose to overwrite, it will replace the existing pbp data with the data you pull.
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
pbp = nfl.import_pbp_data(get_seasons)
|
| 21 |
+
pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
|
| 22 |
+
|
| 23 |
+
if overwrite_seasons:
|
| 24 |
+
file_path = os.path.join(data_directory, 'pbp.csv')
|
| 25 |
+
old = pd.read_csv(file_path, index_col=0, low_memory=False)
|
| 26 |
+
old = old.loc[~old['season'].isin(overwrite_seasons)]
|
| 27 |
+
pbp = pd.concat([old,pbp])
|
| 28 |
+
pbp.to_csv(file_path)
|
| 29 |
+
|
| 30 |
+
year = dt.datetime.now().year
|
| 31 |
+
month = dt.datetime.now().month
|
| 32 |
+
season = year if month in [8,9,10,11,12] else year-1
|
| 33 |
+
pbp_this_year = pbp.loc[pbp['season']==season]
|
| 34 |
+
file_path = os.path.join(data_directory, 'pbp_this_year.csv')
|
| 35 |
+
pbp_this_year.to_csv(file_path)
|
| 36 |
+
|
| 37 |
+
return pbp
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
|
| 41 |
+
"""
|
| 42 |
+
Using pbp.csv, build a game-by-game dataset to use for prediction models.
|
| 43 |
+
Populate update_seasons with the current year to only update this season's data while preserving historical data.
|
| 44 |
+
|
| 45 |
+
"""
|
| 46 |
+
print('Loading play-by-play data.')
|
| 47 |
+
|
| 48 |
+
if overwrite_seasons:
|
| 49 |
+
print('Overwriting data for', overwrite_seasons)
|
| 50 |
+
pbp = get_pbp_data(get_seasons, overwrite_seasons)
|
| 51 |
+
|
| 52 |
+
if not overwrite_seasons:
|
| 53 |
+
file_path = os.path.join(data_directory, 'pbp.csv')
|
| 54 |
+
pbp = pd.read_csv(file_path, index_col=0)
|
| 55 |
+
|
| 56 |
+
pbp = pbp.loc[pbp['season'].isin(get_seasons)]
|
| 57 |
+
game_date_dict = dict(pbp[['game_id','game_date']].values)
|
| 58 |
+
teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
|
| 59 |
+
print(teams)
|
| 60 |
+
seasons = pbp['season'].unique()
|
| 61 |
+
|
| 62 |
+
print('Building game-by-game data.')
|
| 63 |
+
data = pd.DataFrame()
|
| 64 |
+
for season in seasons:
|
| 65 |
+
print(season)
|
| 66 |
+
for team_name in tqdm(teams):
|
| 67 |
+
# create features
|
| 68 |
+
team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
|
| 69 |
+
team['GP'] = team['week']
|
| 70 |
+
team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
| 71 |
+
team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
|
| 72 |
+
team['W_PCT'] = team['W']/team['GP']
|
| 73 |
+
team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
|
| 74 |
+
team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
|
| 75 |
+
team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
|
| 76 |
+
team['FG_PCT'] = team['FGM']/team['FGA']
|
| 77 |
+
team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
| 78 |
+
team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
| 79 |
+
team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
| 80 |
+
team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
| 81 |
+
team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
|
| 82 |
+
team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
|
| 83 |
+
team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
|
| 84 |
+
team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
|
| 85 |
+
team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
| 86 |
+
team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
| 87 |
+
team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
| 88 |
+
team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
| 89 |
+
team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
| 90 |
+
team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
| 91 |
+
team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
|
| 92 |
+
team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
|
| 93 |
+
team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
|
| 94 |
+
team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
|
| 95 |
+
team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
|
| 96 |
+
team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
|
| 97 |
+
team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
|
| 98 |
+
team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
| 99 |
+
|
| 100 |
+
# aggregate from play-by-play to game-by-game
|
| 101 |
+
features = {
|
| 102 |
+
'GP':'mean',
|
| 103 |
+
'W':'mean',
|
| 104 |
+
'L':'mean',
|
| 105 |
+
'W_PCT':'mean',
|
| 106 |
+
'TOP':'sum',
|
| 107 |
+
'FGA':'sum',
|
| 108 |
+
'FGM':'sum',
|
| 109 |
+
'FG_PCT':'mean',
|
| 110 |
+
'PassTD':'sum',
|
| 111 |
+
'RushTD':'sum',
|
| 112 |
+
'PassTD_Allowed':'sum',
|
| 113 |
+
'RushTD_Allowed':'sum',
|
| 114 |
+
'PassYds':'sum',
|
| 115 |
+
'RushYds':'sum',
|
| 116 |
+
'PassYds_Allowed':'sum',
|
| 117 |
+
'RushYds_Allowed':'sum',
|
| 118 |
+
'Fum':'sum',
|
| 119 |
+
'Fum_Allowed':'sum',
|
| 120 |
+
'INT':'sum',
|
| 121 |
+
'INT_Allowed':'sum',
|
| 122 |
+
'Sacks':'sum',
|
| 123 |
+
'Sacks_Allowed':'sum',
|
| 124 |
+
'Penalties':'sum',
|
| 125 |
+
'FirstDowns':'sum',
|
| 126 |
+
'3rdDownConverted':'sum',
|
| 127 |
+
'3rdDownFailed':'sum',
|
| 128 |
+
'3rdDownAllowed':'sum',
|
| 129 |
+
'3rdDownDefended':'sum',
|
| 130 |
+
'PTS':'mean',
|
| 131 |
+
'PointDiff':'mean'
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
game = team.groupby('game_id').agg(features).reset_index()
|
| 135 |
+
game[['W','L']] = game[['W','L']].expanding().sum()
|
| 136 |
+
game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
|
| 137 |
+
game[game.columns[1:]] = game[game.columns[1:]].shift()
|
| 138 |
+
game['TEAM'] = team_name
|
| 139 |
+
game['Season'] = season
|
| 140 |
+
|
| 141 |
+
data = pd.concat([data,game])
|
| 142 |
+
|
| 143 |
+
# separate home and away data and merge
|
| 144 |
+
data = data.merge(pbp[['game_id','home_team','away_team']].drop_duplicates())
|
| 145 |
+
home = data.loc[data['home_team']==data['TEAM']]
|
| 146 |
+
away = data.loc[data['away_team']==data['TEAM']]
|
| 147 |
+
away.columns = [f'{i}.Away' for i in away.columns]
|
| 148 |
+
gbg = home.merge(away,left_on='game_id',right_on='game_id.Away')
|
| 149 |
+
gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
|
| 150 |
+
gbg['game_date'] = gbg['game_id'].map(game_date_dict)
|
| 151 |
+
|
| 152 |
+
if overwrite_seasons:
|
| 153 |
+
file_path = os.path.join(data_directory, 'gbg.csv')
|
| 154 |
+
old = pd.read_csv(file_path, index_col=0, low_memory=False)
|
| 155 |
+
old = old.loc[~old['Season'].isin(overwrite_seasons)]
|
| 156 |
+
gbg = pd.concat([old,gbg])
|
| 157 |
+
file_path = os.path.join(data_directory, 'gbg.csv')
|
| 158 |
+
gbg.to_csv(file_path)
|
| 159 |
+
|
| 160 |
+
year = dt.datetime.now().year
|
| 161 |
+
month = dt.datetime.now().month
|
| 162 |
+
season = year if month in [8,9,10,11,12] else year-1
|
| 163 |
+
gbg_this_year = gbg.loc[gbg['Season']==season]
|
| 164 |
+
file_path = os.path.join(data_directory, 'gbg_this_year.csv')
|
| 165 |
+
gbg_this_year.to_csv(file_path)
|
| 166 |
+
|
| 167 |
+
return gbg
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def add_odds_data(gbg, overwrite=False):
|
| 171 |
+
"""
|
| 172 |
+
Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
|
| 173 |
+
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
# get team abbreviations
|
| 177 |
+
team_descriptions = nfl.import_team_desc()
|
| 178 |
+
team_abbreviation_dict = dict(team_descriptions[['team_name','team_abbr']].values)
|
| 179 |
+
|
| 180 |
+
# get odds
|
| 181 |
+
odds = pd.read_excel('https://www.aussportsbetting.com/historical_data/nfl.xlsx')
|
| 182 |
+
odds['Home Team'] = odds['Home Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
|
| 183 |
+
odds['Away Team'] = odds['Away Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
|
| 184 |
+
odds['Season'] = [i.year if i.month in [8,9,10,11,12] else i.year-1 for i in odds['Date']]
|
| 185 |
+
odds['Home Team Abbrev'] = odds['Home Team'].map(team_abbreviation_dict)
|
| 186 |
+
odds['Away Team Abbrev'] = odds['Away Team'].map(team_abbreviation_dict)
|
| 187 |
+
odds = odds[['Date','Home Score','Away Score','Home Team Abbrev','Away Team Abbrev','Home Odds Close','Away Odds Close','Total Score Close']]
|
| 188 |
+
odds['Key'] = odds['Date'].astype(str) + odds['Home Team Abbrev'] + odds['Away Team Abbrev']
|
| 189 |
+
odds = odds.drop(columns=['Date','Home Team Abbrev','Away Team Abbrev']).dropna()
|
| 190 |
+
odds['Home Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Home Odds Close']]
|
| 191 |
+
odds['Away Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Away Odds Close']]
|
| 192 |
+
odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
|
| 193 |
+
odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
|
| 194 |
+
|
| 195 |
+
# merge with gbg
|
| 196 |
+
gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
|
| 197 |
+
gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
|
| 198 |
+
gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
|
| 199 |
+
gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
|
| 200 |
+
|
| 201 |
+
if overwrite:
|
| 202 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
| 203 |
+
gbg_and_odds.to_csv(file_path)
|
| 204 |
+
|
| 205 |
+
return gbg_and_odds
|
| 206 |
+
|
Source/Build/nfl_data_py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit e4988dc303bc441108dd11f4ae93a8200aab10e1
|
Source/Build/update.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nfl_data_py.nfl_data_py as nfl
|
| 2 |
+
import build
|
| 3 |
+
import datetime as dt
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
pd.set_option('chained_assignment',None)
|
| 7 |
+
pd.set_option('display.max_columns',None)
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
parent_directory = os.path.dirname(current_directory)
|
| 12 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 13 |
+
|
| 14 |
+
# get current season
|
| 15 |
+
year = dt.datetime.now().year
|
| 16 |
+
month = dt.datetime.now().month
|
| 17 |
+
season = year if month in [8,9,10,11,12] else year-1
|
| 18 |
+
|
| 19 |
+
# update current season
|
| 20 |
+
gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
|
| 21 |
+
gbg_and_odds = build.add_odds_data(gbg)
|
| 22 |
+
gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
|
| 23 |
+
|
| 24 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
|
| 25 |
+
gbg_and_odds_this_year.to_csv(file_path)
|
Source/Data/gbg.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
|
| 3 |
+
size 1792121
|
Source/Data/gbg_and_odds.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
|
| 3 |
+
size 1567692
|
Source/Data/gbg_and_odds_this_year.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b848b812a85a74ad20af51565784382f9a9cd97af3b65d77801dd1d009054f91
|
| 3 |
+
size 886
|
Source/Data/gbg_this_year.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61d1340b0f2d8f5d4cad8efa0dfa2246adb0748ded9f3841709bde80a7146c74
|
| 3 |
+
size 844
|
Source/Data/pbp.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
|
| 3 |
+
size 247394694
|
Source/Data/pbp_this_year.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca7b56d8e82fad5c40ee396ec129c95f2f213864b190078d03a8ec665a0532c6
|
| 3 |
+
size 405815
|
Source/Models/__init__.py
ADDED
|
File without changes
|
Source/Models/xgboost_ML_75.4%.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source/Models/xgboost_OU_59.3%.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source/Pickles/team_abbreviation_to_name.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d457e4ca669b5000d270669b963ce286a7b8ff0f7139535c7d0bd6439fddd4f
|
| 3 |
+
size 910
|
Source/Pickles/team_name_to_abbreviation.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdd24bc318fde8622b827dfaa76fdbba5849d11cb61fb99bee50adcebb20fdc1
|
| 3 |
+
size 903
|
Source/Pickles/test_games_ML.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0b6c58108f994d1f070c6ee85bba812da57d9395646c05e6bf3cb85a16b9f51
|
| 3 |
+
size 7376
|
Source/Pickles/test_games_OU.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69e2be2359534720fe42752b3e983e327e4e66a0a2bfa5924d4e750db458854e
|
| 3 |
+
size 7354
|
Source/Pickles/train_games_ML.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d13bfdb558d5753359f56ae4f2450e36ad8b21c10e1cc5e778b786759b83c62
|
| 3 |
+
size 60497
|
Source/Pickles/train_games_OU.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba28c20549cb0b08e68631bbdce808399dd1ad91f190ba18f6cbfdfeee0a4467
|
| 3 |
+
size 60519
|
Source/Predict/__pycache__/predict.cpython-311.pyc
ADDED
|
Binary file (18.8 kB). View file
|
|
|
Source/Predict/predict.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xgboost as xgb
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import pickle as pkl
|
| 5 |
+
import os
|
| 6 |
+
import requests
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
+
|
| 9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
parent_directory = os.path.dirname(current_directory)
|
| 11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
| 13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
| 14 |
+
|
| 15 |
+
file_path = os.path.join(data_directory, 'pbp_this_year.csv')
|
| 16 |
+
pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
|
| 17 |
+
|
| 18 |
+
# get team abbreviations
|
| 19 |
+
file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
|
| 20 |
+
with open(file_path, 'rb') as f:
|
| 21 |
+
team_name_to_abbreviation = pkl.load(f)
|
| 22 |
+
|
| 23 |
+
file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
|
| 24 |
+
with open(file_path, 'rb') as f:
|
| 25 |
+
team_abbreviation_to_name = pkl.load(f)
|
| 26 |
+
|
| 27 |
+
def get_week():
|
| 28 |
+
headers = {
|
| 29 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
| 30 |
+
'Accept-Encoding': 'gzip, deflate',
|
| 31 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 32 |
+
'Cache-Control': 'max-age=0',
|
| 33 |
+
'Connection': 'keep-alive',
|
| 34 |
+
'Dnt': '1',
|
| 35 |
+
'Upgrade-Insecure-Requests': '1',
|
| 36 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
|
| 37 |
+
}
|
| 38 |
+
url = 'https://www.nfl.com/schedules/'
|
| 39 |
+
resp = requests.get(url,headers=headers)
|
| 40 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
| 41 |
+
h2_tags = soup.find_all('h2')
|
| 42 |
+
year = h2_tags[0].getText().split(' ')[0]
|
| 43 |
+
week = h2_tags[0].getText().split(' ')[-1]
|
| 44 |
+
return int(week), int(year)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def get_games():
|
| 48 |
+
# pull from NBC
|
| 49 |
+
url = 'https://www.nbcsports.com/nfl/schedule'
|
| 50 |
+
df = pd.read_html(url)[0]
|
| 51 |
+
df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
|
| 52 |
+
df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
|
| 53 |
+
df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
|
| 54 |
+
df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
|
| 55 |
+
df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
|
| 56 |
+
|
| 57 |
+
return df[['Away Team','Home Team','Date']]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def get_one_week(team_name,season,week):
|
| 61 |
+
# create columns
|
| 62 |
+
team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
|
| 63 |
+
team['GP'] = team['week']
|
| 64 |
+
team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
| 65 |
+
team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
|
| 66 |
+
team['W_PCT'] = team['W']/team['GP']
|
| 67 |
+
team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
|
| 68 |
+
team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
|
| 69 |
+
team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
|
| 70 |
+
team['FG_PCT'] = team['FGM']/team['FGA']
|
| 71 |
+
team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
| 72 |
+
team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
| 73 |
+
team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
|
| 74 |
+
team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
|
| 75 |
+
team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
|
| 76 |
+
team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
|
| 77 |
+
team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
|
| 78 |
+
team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
|
| 79 |
+
team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
| 80 |
+
team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
|
| 81 |
+
team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
| 82 |
+
team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
|
| 83 |
+
team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
| 84 |
+
team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
|
| 85 |
+
team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
|
| 86 |
+
team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
|
| 87 |
+
team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
|
| 88 |
+
team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
|
| 89 |
+
team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
|
| 90 |
+
team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
|
| 91 |
+
team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
|
| 92 |
+
team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
|
| 93 |
+
|
| 94 |
+
# aggregate from play-by-play to game-by-game
|
| 95 |
+
features = {
|
| 96 |
+
'GP':'mean',
|
| 97 |
+
'W':'mean',
|
| 98 |
+
'L':'mean',
|
| 99 |
+
'W_PCT':'mean',
|
| 100 |
+
'TOP':'sum',
|
| 101 |
+
'FGA':'sum',
|
| 102 |
+
'FGM':'sum',
|
| 103 |
+
'FG_PCT':'mean',
|
| 104 |
+
'PassTD':'sum',
|
| 105 |
+
'RushTD':'sum',
|
| 106 |
+
'PassTD_Allowed':'sum',
|
| 107 |
+
'RushTD_Allowed':'sum',
|
| 108 |
+
'PassYds':'sum',
|
| 109 |
+
'RushYds':'sum',
|
| 110 |
+
'PassYds_Allowed':'sum',
|
| 111 |
+
'RushYds_Allowed':'sum',
|
| 112 |
+
'Fum':'sum',
|
| 113 |
+
'Fum_Allowed':'sum',
|
| 114 |
+
'INT':'sum',
|
| 115 |
+
'INT_Allowed':'sum',
|
| 116 |
+
'Sacks':'sum',
|
| 117 |
+
'Sacks_Allowed':'sum',
|
| 118 |
+
'Penalties':'sum',
|
| 119 |
+
'FirstDowns':'sum',
|
| 120 |
+
'3rdDownConverted':'sum',
|
| 121 |
+
'3rdDownFailed':'sum',
|
| 122 |
+
'3rdDownAllowed':'sum',
|
| 123 |
+
'3rdDownDefended':'sum',
|
| 124 |
+
'PTS':'mean',
|
| 125 |
+
'PointDiff':'mean'
|
| 126 |
+
}
|
| 127 |
+
game = team.groupby('game_id').agg(features).reset_index()
|
| 128 |
+
game[['W','L']] = game[['W','L']].expanding().sum()
|
| 129 |
+
game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
|
| 130 |
+
game['TEAM'] = team_name
|
| 131 |
+
game['Season'] = season
|
| 132 |
+
return game.loc[game['GP']==week]
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def get_one_week_home_and_away(home,away,season,week):
|
| 136 |
+
home = get_one_week(home,season,week)
|
| 137 |
+
away = get_one_week(away,season,week)
|
| 138 |
+
away.columns = [f'{i}.Away' for i in away.columns]
|
| 139 |
+
gbg = home.merge(away,left_index=True,right_index=True)
|
| 140 |
+
gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
|
| 141 |
+
return gbg
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def predict(home,away,season,week,total):
|
| 145 |
+
# finish preparing data
|
| 146 |
+
home_abbrev = team_name_to_abbreviation[home]
|
| 147 |
+
away_abbrev = team_name_to_abbreviation[away]
|
| 148 |
+
gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
|
| 149 |
+
gbg['Total Score Close'] = total
|
| 150 |
+
|
| 151 |
+
matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
|
| 152 |
+
|
| 153 |
+
# moneyline
|
| 154 |
+
model = 'xgboost_ML_75.4%'
|
| 155 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
| 156 |
+
xgb_ml = xgb.Booster()
|
| 157 |
+
xgb_ml.load_model(file_path)
|
| 158 |
+
try:
|
| 159 |
+
ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
|
| 160 |
+
winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba])
|
| 161 |
+
moneyline = {'Winner': [home if ml_predicted_proba>0.6 else away if ml_predicted_proba<0.4 else 'Toss-Up'],
|
| 162 |
+
'Probabilities':[winner_proba]}
|
| 163 |
+
except:
|
| 164 |
+
moneyline = {'Winner': 'NA',
|
| 165 |
+
'Probabilities':['N/A']}
|
| 166 |
+
|
| 167 |
+
# over/under
|
| 168 |
+
model = 'xgboost_OU_59.3%'
|
| 169 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
| 170 |
+
xgb_ou = xgb.Booster()
|
| 171 |
+
xgb_ou.load_model(file_path)
|
| 172 |
+
try:
|
| 173 |
+
ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
|
| 174 |
+
over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
|
| 175 |
+
'Probability': [ou_predicted_proba]}
|
| 176 |
+
except:
|
| 177 |
+
over_under = {'Over/Under': 'N/A',
|
| 178 |
+
'Probabilities': ['N/A']}
|
| 179 |
+
|
| 180 |
+
return moneyline, over_under
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def update_past_predictions():
|
| 184 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
|
| 185 |
+
gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
|
| 186 |
+
total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
|
| 187 |
+
games = pbp.drop_duplicates(subset='game_id')
|
| 188 |
+
|
| 189 |
+
predictions = {}
|
| 190 |
+
for _, i in games.iterrows():
|
| 191 |
+
game_id = i['game_id']
|
| 192 |
+
home = i['home_team']
|
| 193 |
+
away = i['away_team']
|
| 194 |
+
week = i['week']
|
| 195 |
+
season = i['season']
|
| 196 |
+
total = total_dict[game_id]
|
| 197 |
+
predictions[game_id] = predict(home,away,season,week,total)
|
| 198 |
+
|
| 199 |
+
predictions_df = pd.DataFrame(predictions)
|
| 200 |
+
file_path = os.path.join(data_directory, 'predictions_this_year.csv')
|
| 201 |
+
predictions_df.to_csv(file_path)
|
Source/Test/__init__.py
ADDED
|
File without changes
|
Source/Test/xgboost_ML.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xgboost as xgb
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pickle as pkl
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
model = 'xgboost_ML_75.4%'
|
| 8 |
+
|
| 9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
parent_directory = os.path.dirname(current_directory)
|
| 11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
| 13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
| 14 |
+
|
| 15 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
| 16 |
+
xgb_ml = xgb.Booster()
|
| 17 |
+
xgb_ml.load_model(file_path)
|
| 18 |
+
|
| 19 |
+
file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
|
| 20 |
+
with open(file_path,'rb') as f:
|
| 21 |
+
test_games = pkl.load(f).tolist()
|
| 22 |
+
|
| 23 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
| 24 |
+
gbg_and_odds = pd.read_csv(file_path, index_col=0)
|
| 25 |
+
test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
|
| 26 |
+
test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
|
| 27 |
+
|
| 28 |
+
predicted_probas = xgb_ml.predict(test_data_matrix)
|
| 29 |
+
predictions = np.argmax(predicted_probas, axis=1)
|
| 30 |
+
test_data['predicted_proba'] = [i[1] for i in predicted_probas]
|
| 31 |
+
test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
|
| 32 |
+
test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
|
| 33 |
+
|
| 34 |
+
bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
|
| 35 |
+
bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
|
| 36 |
+
|
| 37 |
+
import matplotlib.pyplot as plt
|
| 38 |
+
fig = plt.figure(facecolor='black')
|
| 39 |
+
ax = fig.add_subplot(1, 1, 1, facecolor='black')
|
| 40 |
+
|
| 41 |
+
# Plot data with line color as RGB(0, 128, 0)
|
| 42 |
+
ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
|
| 43 |
+
|
| 44 |
+
# Set title and labels
|
| 45 |
+
ax.set_title('MARCI 3.0 - MoneyLine w/ 60% Confidence Threshold', color='white')
|
| 46 |
+
ax.set_xlabel('Games Bet On', color='white')
|
| 47 |
+
ax.set_ylabel('Return (%)', color='white')
|
| 48 |
+
|
| 49 |
+
# Change tick colors to white
|
| 50 |
+
ax.tick_params(axis='x', colors='white')
|
| 51 |
+
ax.tick_params(axis='y', colors='white')
|
| 52 |
+
|
| 53 |
+
# Change axis edge colors
|
| 54 |
+
ax.spines['bottom'].set_color('white')
|
| 55 |
+
ax.spines['top'].set_color('white')
|
| 56 |
+
ax.spines['left'].set_color('white')
|
| 57 |
+
ax.spines['right'].set_color('white')
|
| 58 |
+
|
| 59 |
+
plt.savefig(f'{model}_dark.png', facecolor='black')
|
Source/Test/xgboost_ML_75.4%.png
ADDED
|
Source/Test/xgboost_ML_75.4%_dark.png
ADDED
|
Source/Test/xgboost_OU.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xgboost as xgb
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pickle as pkl
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
model = 'xgboost_OU_59.3%'
|
| 8 |
+
|
| 9 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
parent_directory = os.path.dirname(current_directory)
|
| 11 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 12 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
| 13 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
| 14 |
+
|
| 15 |
+
file_path = os.path.join(model_directory, f'{model}.json')
|
| 16 |
+
xgb_ou = xgb.Booster()
|
| 17 |
+
xgb_ou.load_model(file_path)
|
| 18 |
+
|
| 19 |
+
file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
|
| 20 |
+
with open(file_path,'rb') as f:
|
| 21 |
+
test_games = pkl.load(f).tolist()
|
| 22 |
+
|
| 23 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
| 24 |
+
gbg_and_odds = pd.read_csv(file_path, index_col=0)
|
| 25 |
+
test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
|
| 26 |
+
test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
|
| 27 |
+
|
| 28 |
+
predicted_probas = xgb_ou.predict(test_data_matrix)
|
| 29 |
+
predictions = np.argmax(predicted_probas, axis=1)
|
| 30 |
+
test_data['predicted_proba'] = [i[1] for i in predicted_probas]
|
| 31 |
+
test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
|
| 32 |
+
test_data['correct'] = test_data['Over']==test_data['prediction']
|
| 33 |
+
|
| 34 |
+
bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
|
| 35 |
+
bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
|
| 36 |
+
|
| 37 |
+
import matplotlib.pyplot as plt
|
| 38 |
+
fig = plt.figure(facecolor='black')
|
| 39 |
+
ax = fig.add_subplot(1, 1, 1, facecolor='black')
|
| 40 |
+
|
| 41 |
+
# Plot data with line color as RGB(0, 128, 0)
|
| 42 |
+
ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
|
| 43 |
+
|
| 44 |
+
# Set title and labels
|
| 45 |
+
ax.set_title('MARCI 3.0 - Over/Under', color='white')
|
| 46 |
+
ax.set_xlabel('Games Bet On', color='white')
|
| 47 |
+
ax.set_ylabel('Return (%)', color='white')
|
| 48 |
+
|
| 49 |
+
# Change tick colors to white
|
| 50 |
+
ax.tick_params(axis='x', colors='white')
|
| 51 |
+
ax.tick_params(axis='y', colors='white')
|
| 52 |
+
|
| 53 |
+
# Change axis edge colors
|
| 54 |
+
ax.spines['bottom'].set_color('white')
|
| 55 |
+
ax.spines['top'].set_color('white')
|
| 56 |
+
ax.spines['left'].set_color('white')
|
| 57 |
+
ax.spines['right'].set_color('white')
|
| 58 |
+
|
| 59 |
+
plt.savefig(f'{model}_dark.png', facecolor='black')
|
Source/Test/xgboost_OU_59.3%.png
ADDED
|
Source/Test/xgboost_OU_59.3%_dark.png
ADDED
|
Source/Train/xgboost_ML.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xgboost as xgb
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pickle as pkl
|
| 4 |
+
import numpy as np
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from IPython.display import clear_output
|
| 7 |
+
from sklearn.metrics import accuracy_score
|
| 8 |
+
from sklearn.model_selection import train_test_split
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 12 |
+
parent_directory = os.path.dirname(current_directory)
|
| 13 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 14 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
| 15 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
| 16 |
+
|
| 17 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
| 18 |
+
data = pd.read_csv(file_path, index_col=0).dropna()
|
| 19 |
+
|
| 20 |
+
margin = data['Home-Team-Win']
|
| 21 |
+
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
|
| 22 |
+
|
| 23 |
+
acc_results = []
|
| 24 |
+
|
| 25 |
+
for x in tqdm(range(100)):
|
| 26 |
+
X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)
|
| 27 |
+
|
| 28 |
+
train_games = X_train['game_id']
|
| 29 |
+
test_games = X_test['game_id']
|
| 30 |
+
|
| 31 |
+
X_train.drop(columns=['game_id'], inplace=True)
|
| 32 |
+
X_test.drop(columns=['game_id'], inplace=True)
|
| 33 |
+
|
| 34 |
+
train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
|
| 35 |
+
test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
|
| 36 |
+
|
| 37 |
+
param = {
|
| 38 |
+
'max_depth': 2,
|
| 39 |
+
'eta': 0.01,
|
| 40 |
+
'objective': 'multi:softprob',
|
| 41 |
+
'num_class': 2
|
| 42 |
+
}
|
| 43 |
+
epochs = 500
|
| 44 |
+
|
| 45 |
+
model = xgb.train(param, train, epochs)
|
| 46 |
+
predictions = model.predict(test)
|
| 47 |
+
y = []
|
| 48 |
+
for z in predictions:
|
| 49 |
+
y.append(np.argmax(z))
|
| 50 |
+
|
| 51 |
+
acc = round(accuracy_score(y_test, y)*100, 1)
|
| 52 |
+
acc_results.append(acc)
|
| 53 |
+
clear_output(wait=True)
|
| 54 |
+
print(f"Best accuracy: {max(acc_results)}%")
|
| 55 |
+
|
| 56 |
+
# only save results if they are the best so far
|
| 57 |
+
if acc == max(acc_results):
|
| 58 |
+
file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
|
| 59 |
+
with open(file_path,'wb') as f:
|
| 60 |
+
pkl.dump(train_games,f)
|
| 61 |
+
|
| 62 |
+
file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
|
| 63 |
+
with open(file_path,'wb') as f:
|
| 64 |
+
pkl.dump(test_games,f)
|
| 65 |
+
|
| 66 |
+
file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
|
| 67 |
+
model.save_model(file_path)
|
| 68 |
+
|
| 69 |
+
print('Done')
|
Source/Train/xgboost_OU.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import xgboost as xgb
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pickle as pkl
|
| 4 |
+
import numpy as np
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from IPython.display import clear_output
|
| 7 |
+
from sklearn.metrics import accuracy_score
|
| 8 |
+
from sklearn.model_selection import train_test_split
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
current_directory = os.path.dirname(os.path.abspath(__file__))
|
| 12 |
+
parent_directory = os.path.dirname(current_directory)
|
| 13 |
+
data_directory = os.path.join(parent_directory, 'Data')
|
| 14 |
+
model_directory = os.path.join(parent_directory, 'Models')
|
| 15 |
+
pickle_directory = os.path.join(parent_directory, 'Pickles')
|
| 16 |
+
|
| 17 |
+
file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
|
| 18 |
+
data = pd.read_csv(file_path, index_col=0).dropna()
|
| 19 |
+
|
| 20 |
+
OU = data['Over']
|
| 21 |
+
data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
|
| 22 |
+
|
| 23 |
+
acc_results = []
|
| 24 |
+
|
| 25 |
+
for x in tqdm(range(100)):
|
| 26 |
+
X_train, X_test, y_train, y_test = train_test_split(data, OU, test_size=.1)
|
| 27 |
+
|
| 28 |
+
train_games = X_train['game_id']
|
| 29 |
+
test_games = X_test['game_id']
|
| 30 |
+
|
| 31 |
+
X_train.drop(columns=['game_id'], inplace=True)
|
| 32 |
+
X_test.drop(columns=['game_id'], inplace=True)
|
| 33 |
+
|
| 34 |
+
train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
|
| 35 |
+
test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
|
| 36 |
+
|
| 37 |
+
param = {
|
| 38 |
+
'max_depth': 6,
|
| 39 |
+
'eta': 0.05,
|
| 40 |
+
'objective': 'multi:softprob',
|
| 41 |
+
'num_class': 3
|
| 42 |
+
}
|
| 43 |
+
epochs = 300
|
| 44 |
+
|
| 45 |
+
model = xgb.train(param, train, epochs)
|
| 46 |
+
predictions = model.predict(test)
|
| 47 |
+
y = []
|
| 48 |
+
|
| 49 |
+
for z in predictions:
|
| 50 |
+
y.append(np.argmax(z))
|
| 51 |
+
|
| 52 |
+
acc = round(accuracy_score(y_test, y)*100, 1)
|
| 53 |
+
acc_results.append(acc)
|
| 54 |
+
clear_output(wait=True)
|
| 55 |
+
print(f"Best accuracy: {max(acc_results)}%")
|
| 56 |
+
|
| 57 |
+
# only save results if they are the best so far
|
| 58 |
+
if acc == max(acc_results):
|
| 59 |
+
file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
|
| 60 |
+
with open(file_path,'wb') as f:
|
| 61 |
+
pkl.dump(train_games,f)
|
| 62 |
+
|
| 63 |
+
file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
|
| 64 |
+
with open(file_path,'wb') as f:
|
| 65 |
+
pkl.dump(test_games,f)
|
| 66 |
+
|
| 67 |
+
file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
|
| 68 |
+
model.save_model(file_path)
|
| 69 |
+
|
| 70 |
+
print('Done')
|
Static/Arizona Cardinals.webp
ADDED
|
Static/Atlanta Falcons.webp
ADDED
|
Static/Baltimore Ravens.webp
ADDED
|
Static/Buffalo Bills.webp
ADDED
|
Static/Carolina Panthers.webp
ADDED
|
Static/Chicago Bears.webp
ADDED
|
Static/Cincinnati Bengals.webp
ADDED
|
Static/Cleveland Browns.webp
ADDED
|
Static/Dallas Cowboys.webp
ADDED
|
Static/Denver Broncos.webp
ADDED
|
Static/Detroit Lions.webp
ADDED
|
Static/Green Bay Packers.webp
ADDED
|
Static/Houston Texans.webp
ADDED
|
Static/Indianapolis Colts.webp
ADDED
|
Static/Jacksonville Jaguars.webp
ADDED
|
Static/Kansas City Chiefs.webp
ADDED
|
Static/Las Vegas Raiders.webp
ADDED
|