import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, timedelta
import random
# [All the scheduling functions and analytics functions here]
from itertools import combinations, product
from datetime import date, timedelta
def generate_schedule_from_data(conference_team_df, available_dates):
# Extract unique conferences
conferences = conference_team_df['Conference'].unique()
# Ensure 'Conference' and 'Team' columns are present
if 'Conference' not in conference_team_df or 'Team' not in conference_team_df:
raise ValueError("The CSV file must contain 'Conference' and 'Team' columns.")
# Generate intra-conference matches
intra_conference_matches = []
for conf in conferences:
teams_in_conf = conference_team_df[conference_team_df['Conference'] == conf]['Team'].tolist()
# Each team plays each other team in their conference twice
matches = list(combinations(teams_in_conf, 2))
intra_conference_matches.extend([(team2, team1) for team1, team2 in matches])
# Generate inter-conference matches (limit these to 1 per team)
inter_conference_matches = []
for team, conference in zip(conference_team_df['Team'], conference_team_df['Conference']):
other_conferences = [conf for conf in conferences if conf != conference]
other_teams = conference_team_df[conference_team_df['Conference'].isin(other_conferences)]['Team'].tolist()
matches = random.sample([(team, other_team) for other_team in other_teams], 1)
# Combine the matches
combined_schedule = intra_conference_matches + inter_conference_matches
scheduled_matches = assign_dates_to_matches(combined_schedule, available_dates)
# Convert to DataFrame
schedule_df = pd.DataFrame(scheduled_matches, columns=['Team 1', 'Team 2', 'Date'])
schedule_df['Conference 1'] = schedule_df['Team 1'].map(conference_team_df.set_index('Team').to_dict()['Conference'])
schedule_df['Conference 2'] = schedule_df['Team 2'].map(conference_team_df.set_index('Team').to_dict()['Conference'])
return schedule_df
# To use this function, load your data into a DataFrame and call this function:
# df = pd.read_csv('path/to/your/csv')
# schedule_df = generate_schedule_from_data(df)
# 6. generate_mock_historical_data
def generate_mock_historical_data(schedule_df):
# Generate random scores for each team in each game
schedule_df['Score 1'] = [random.randint(50, 100) for _ in range(len(schedule_df))]
schedule_df['Score 2'] = [random.randint(50, 100) for _ in range(len(schedule_df))]
# Assume the historical data is from the previous year
schedule_df['Date'] = schedule_df['Date'] - pd.DateOffset(years=1)
return schedule_df
# To use this function, pass the generated schedule DataFrame:
# historical_data = generate_mock_historical_data(schedule_df)
# Assign dates to matches
def generate_available_dates(start_date, num_days=300):
available_dates = [start_date + timedelta(days=i) for i in range(num_days) if (start_date + timedelta(days=i)).weekday() in [0, 2, 3, 5]]
return available_dates
def assign_dates_to_matches(matches, available_dates):
num_dates = len(available_dates)
return [(match[0], match[1], available_dates[i % num_dates]) for i, match in enumerate(matches)]
# Team Workload Analysis
def team_workload_analysis(schedule_df, conference_team_df):
# Check if the DataFrame is None
if schedule_df is None:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.',
horizontalalignment='center', verticalalignment='center',
fontsize=14, color='red')
"""Generate a bar chart showing the number of matches each team has per week."""
schedule_df['Week'] = schedule_df['Date'].dt.isocalendar().week
team_counts = schedule_df.groupby(['Week', 'Team 1']).size().unstack().fillna(0)
# Plot
team_counts.plot(kind='bar', stacked=True, figsize=(15, 7), cmap='Oranges')
plt.title('Team Workload Analysis')
plt.ylabel('Number of Matches')
plt.xlabel('Week Number')
plt.legend(title='Teams', bbox_to_anchor=(1.05, 1), loc='upper left')
# Match Distribution
def match_distribution(schedule_df, conference_team_df):
# Check if the DataFrame is None
if schedule_df is None:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.',
horizontalalignment='center', verticalalignment='center',
fontsize=14, color='red')
"""Generate a histogram showing match distribution across months."""
schedule_df['Month'] = schedule_df['Date'].dt.month_name()
month_order = ['November', 'December', 'January', 'February', 'March']
# Plot
plt.figure(figsize=(10, 6))
sns.countplot(data=schedule_df, x='Month', order=month_order, palette='Oranges_r')
plt.title('Match Distribution Across Months')
plt.ylabel('Number of Matches')
# Inter-Conference Match Analysis
def inter_conference_analysis(schedule_df, conference_team_df):
if schedule_df is None:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.',
horizontalalignment='center', verticalalignment='center',
fontsize=14, color='red')
# Mapping teams to their conferences from the conference_team_df
team_to_conference = conference_team_df.set_index('Team')['Conference'].to_dict()
schedule_df['Conference 1'] = schedule_df['Team 1'].map(team_to_conference)
schedule_df['Conference 2'] = schedule_df['Team 2'].map(team_to_conference)
# Filtering out the intra-conference matches
inter_conference_df = schedule_df[schedule_df['Conference 1'] != schedule_df['Conference 2']]
# Creating a crosstab for the heatmap
heatmap_data = pd.crosstab(inter_conference_df['Conference 1'], inter_conference_df['Conference 2'])
# Ensuring every conference combination has a value
all_conferences = set(conference_team_df['Conference'])
for conf in all_conferences:
if conf not in heatmap_data.columns:
heatmap_data[conf] = 0
if conf not in heatmap_data.index:
heatmap_data.loc[conf] = 0
heatmap_data = heatmap_data.loc[sorted(all_conferences), sorted(all_conferences)]
# Plotting the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(heatmap_data, annot=True, cmap='Oranges', linewidths=.5, cbar_kws={'label': 'Number of Matches'})
plt.title('Inter-Conference Match Analysis')
plt.ylabel('Conference 1')
plt.xlabel('Conference 2')
# Commissioner Analytics
def commissioner_analytics(schedule_df, conference_team_df, commissioners):
# Check if the DataFrame is None
if schedule_df is None:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, 'Please generate the schedule first before viewing analytics.',
horizontalalignment='center', verticalalignment='center',
fontsize=14, color='red')
"""Generate a bar chart showing matches overseen by each commissioner."""
# Assuming each commissioner oversees a specific conference
comm_dict = {conf: comm for conf, comm in zip(conference_team_df['Conference'].unique(), commissioners)}
schedule_df['Commissioner'] = schedule_df['Conference 1'].map(comm_dict)
# Count matches overseen by each commissioner
commissioner_counts = schedule_df['Commissioner'].value_counts()
# Plot using matplotlib
plt.figure(figsize=(10, 6)), commissioner_counts.values, color='orange')
plt.title('Matches Overseen by Each Commissioner')
plt.ylabel('Number of Matches')
# Streamlit App
st.title("Basketball Game Schedule Generator")
st.set_option('deprecation.showPyplotGlobalUse', False)
# UI for CSV File Uploader
uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
start_date = date(2022, 11, 6)
available_dates = generate_available_dates(start_date)
# Load the Uploaded CSV File
if uploaded_file is not None:
st.session_state.df = pd.read_csv(uploaded_file)
st.write('Uploaded CSV file:')
# Generate Schedule using Uploaded Data
if st.button("Generate Schedule"):
st.session_state.schedule_df = generate_schedule_from_data(st.session_state.df, available_dates)
st.write('Generated Schedule:')
st.warning("Please upload a CSV file to proceed.")
# Initialize session state for schedule_df and st.session_state.historical_data
if 'schedule_df' not in st.session_state:
st.session_state.schedule_df = None
#if 'st.session_state.historical_data' not in st.session_state:
# st.session_state.historical_data = None
if st.session_state.historical_data is None:
st.session_state.historical_data = generate_mock_historical_data(st.session_state.schedule_df)
st.session_state.historical_data['Date'] = pd.to_datetime(st.session_state.historical_data['Date'])
if st.button("Generate Mock Historical Data"):
# Only generate historical data if it hasn’t been generated already
if st.session_state.historical_data is None:
# Ensure that the schedule has been generated before generating historical data
if st.session_state.schedule_df is not None:
# Generate the mock historical data based on the generated schedule
st.session_state.historical_data = generate_mock_historical_data(st.session_state.schedule_df)
st.write('Generated Mock Historical Data:')
st.warning("Please generate the schedule first before generating mock historical data.")
# Configuration UI
commissioners = st.multiselect("Add commissioners:", options=[], default=[])
add_commissioner = st.text_input("New commissioner name:")
if add_commissioner:
# Schedule Viewing
st.header("View Schedule")
if st.session_state.schedule_df is not None:
# Fetching the unique conferences from the schedule DataFrame
conferences = st.session_state.schedule_df['Conference 1'].unique()
conference_selector = st.selectbox("Select conference to view schedule:", options=["All"] + list(conferences))
if conference_selector == "All":
# Filtering the schedule based on the selected conference
filtered_schedule = st.session_state.schedule_df[(st.session_state.schedule_df["Conference 1"] == conference_selector) | (st.session_state.schedule_df["Conference 2"] == conference_selector)]
st.warning("Schedule has not been generated yet.")
# Analytics & Comparisons
st.header("Analytics & Comparisons")
analytics_option = st.selectbox("Choose an analysis type:", ["Team Workload Analysis", "Match Distribution", "Inter-Conference Match Analysis", "Commissioner Analytics"])
if st.session_state.historical_data is not None:
st.session_state.historical_data['Date'] = pd.to_datetime(st.session_state.historical_data['Date'])
st.error("Historical data has not been generated yet.")
if analytics_option == "Team Workload Analysis":
st.subheader("Historical Data")
st.pyplot(team_workload_analysis(st.session_state.historical_data, st.session_state.df))
st.subheader("Current Data")
st.pyplot(team_workload_analysis(st.session_state.schedule_df, st.session_state.df))
elif analytics_option == "Match Distribution":
st.subheader("Historical Data")
st.pyplot(match_distribution(st.session_state.historical_data, st.session_state.df))
st.subheader("Current Data")
st.pyplot(match_distribution(st.session_state.schedule_df, st.session_state.df))
elif analytics_option == "Inter-Conference Match Analysis":
st.subheader("Historical Data")
st.pyplot(inter_conference_analysis(st.session_state.historical_data, st.session_state.df))
st.subheader("Current Data")
st.pyplot(inter_conference_analysis(st.session_state.schedule_df, st.session_state.df))
elif analytics_option == "Commissioner Analytics":
st.subheader("Historical Data")
st.pyplot(commissioner_analytics(st.session_state.historical_data, st.session_state.df, commissioners))
st.subheader("Current Data")
st.pyplot(commissioner_analytics(st.session_state.schedule_df, st.session_state.df, commissioners))
st.warning("Please generate the schedule first before viewing analytics.")
# Export functionality can be added later