from nfl_data_py import nfl_data_py as nfl
from tqdm import tqdm
import numpy as np
import pandas as pd
pd.set_option('chained_assignment',None)
pd.set_option('display.max_columns',None)
import os
import datetime as dt

current_directory = os.path.dirname(os.path.abspath(__file__))
parent_directory = os.path.dirname(current_directory)
data_directory = os.path.join(parent_directory, 'Data')

year = dt.datetime.now().year
month = dt.datetime.now().month
current_season = year if month in [8,9,10,11,12] else year-1

def get_pbp_data(get_seasons=[]):
    """
    Pull data from nflFastR's Github repo. 

    """
    pbp = nfl.import_pbp_data(get_seasons)
    #pbp = pd.read_csv(r"C:\Users\brayd\Downloads\play_by_play_2023.csv")
    pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)

    return pbp


def build_gbg_data(get_seasons=[]):
    """
    Build a game-by-game dataset to use for prediction models.

    """
    print('Loading play-by-play data.')
    pbp = get_pbp_data(get_seasons)
    game_date_dict = dict(pbp[['game_id','game_date']].values)
    teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
    seasons = pbp['season'].unique()
    
    print('Building game-by-game data.')
    data = pd.DataFrame()
    for season in seasons:
        print(season)
        for team_name in tqdm(teams):
            # create features
            team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)] 
            team['GP'] = team['week']
            team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
            team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
            team['W_PCT'] = team['W']/team['GP']
            team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
            team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
            team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
            team['FG_PCT'] = team['FGM']/team['FGA']
            team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
            team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
            team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
            team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
            team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
            team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
            team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
            team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
            team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
            team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
            team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
            team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
            team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
            team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
            team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
            team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
            team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
            team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
            team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
            team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
            team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
            team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]

            # aggregate from play-by-play to game-by-game
            features = {
                'GP':'mean',
                'W':'mean',
                'L':'mean',
                'W_PCT':'mean',
                'TOP':'sum',
                'FGA':'sum',
                'FGM':'sum',
                'FG_PCT':'mean',
                'PassTD':'sum',
                'RushTD':'sum',
                'PassTD_Allowed':'sum',
                'RushTD_Allowed':'sum',
                'PassYds':'sum',
                'RushYds':'sum',
                'PassYds_Allowed':'sum',
                'RushYds_Allowed':'sum',
                'Fum':'sum',
                'Fum_Allowed':'sum',
                'INT':'sum',
                'INT_Allowed':'sum',
                'Sacks':'sum',
                'Sacks_Allowed':'sum',
                'Penalties':'sum',
                'FirstDowns':'sum',
                '3rdDownConverted':'sum',
                '3rdDownFailed':'sum',
                '3rdDownAllowed':'sum',
                '3rdDownDefended':'sum',
                'PTS':'mean',
                'PointDiff':'mean'
            }

            game = team.groupby('game_id').agg(features).reset_index().sort_values('GP')
            game[['W','L']] = game[['W','L']].expanding().sum()
            game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
            if season != current_season:
                game[game.columns[1:]] = game[game.columns[1:]].shift()
                game['TEAM'] = team_name
                game['Season'] = season
            else:
                game['TEAM'] = team_name
                game['Season'] = season

            data = pd.concat([data,game])

    # separate home and away data and merge
    data = data.merge(pbp[['game_id','home_team','away_team']].drop_duplicates())
    home = data.loc[data['home_team']==data['TEAM']]
    away = data.loc[data['away_team']==data['TEAM']]
    away.columns = [f'{i}.Away' for i in away.columns]
    gbg = home.merge(away,left_on='game_id',right_on='game_id.Away')
    gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
    gbg['game_date'] = gbg['game_id'].map(game_date_dict)

    # save current data 
    if current_season in get_seasons:
        gbg_this_year = gbg.loc[gbg['Season']==current_season]
        file_path = os.path.join(data_directory, 'gbg_this_year.csv')
        gbg_this_year.to_csv(file_path, index=False)

    # save historical data 
    if get_seasons != [current_season]:
        gbg = gbg.loc[gbg['Season']!=current_season]
        file_path = os.path.join(data_directory, 'gbg.csv')
        gbg.to_csv(file_path, index=False)


def add_odds_data():
    """
    Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.

    """
    
    # get team abbreviations
    team_descriptions = nfl.import_team_desc()
    team_abbreviation_dict = dict(team_descriptions[['team_name','team_abbr']].values)
    
    # get odds
    odds = pd.read_excel('https://www.aussportsbetting.com/historical_data/nfl.xlsx')
    odds['Home Team'] = odds['Home Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
    odds['Away Team'] = odds['Away Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
    odds['Season'] = [i.year if i.month in [8,9,10,11,12] else i.year-1 for i in odds['Date']]
    odds['Home Team Abbrev'] = odds['Home Team'].map(team_abbreviation_dict).str.replace('LAR','LA')
    odds['Away Team Abbrev'] = odds['Away Team'].map(team_abbreviation_dict).str.replace('LAR','LA')
    odds = odds[['Date','Home Score','Away Score','Home Team Abbrev','Away Team Abbrev','Home Odds Close','Away Odds Close','Total Score Close','Home Line Close']]
    odds['Key'] = odds['Date'].astype(str) + odds['Home Team Abbrev'] + odds['Away Team Abbrev']
    odds = odds.drop(columns=['Date','Home Team Abbrev','Away Team Abbrev']).dropna()
    odds['Home Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Home Odds Close']]
    odds['Away Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Away Odds Close']]
    odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
    odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]

    # load gbg data
    file_path = os.path.join(data_directory, 'gbg.csv')
    gbg = pd.read_csv(file_path)
    file_path = os.path.join(data_directory, 'gbg_this_year.csv')
    gbg_this_year = pd.read_csv(file_path)

    # merge and save
    dataframes = [gbg, gbg_this_year]
    for idx in range(2):
        i = dataframes[idx]
        i['Key'] = i['game_date'].astype(str) + i['home_team'] + i['away_team']
        gbg_and_odds = i.merge(odds, left_on='Key', right_on='Key')
        gbg_and_odds['Home-Team-Cover'] = [1 if (h-a)>-l else 0 if (h-a)<-l else 2 for h,a,l in gbg_and_odds[['Home Score','Away Score','Home Line Close']].values] 
        gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
        gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
        
        if idx==0:
            file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
        else:
            file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')           
        
        gbg_and_odds.drop_duplicates(subset='game_id').to_csv(file_path, index=False)