|
from typing import Optional, Tuple |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
from pathlib import Path |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from wordcloud import WordCloud |
|
import numpy as np |
|
|
|
def _rename_columns(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame: |
|
columns = { |
|
"Rating": "rating", |
|
"Result": "result", |
|
"Scores": "scores", |
|
"Opponent": "opponent", |
|
"OpponentRating": "opponent_rating", |
|
} |
|
|
|
if is_tournament: |
|
columns.update({ |
|
"TournamentStartDate": "tournament_start_date", |
|
"TournamentEndDate": "tournament_end_date", |
|
" Touranament": "tournament", |
|
}) |
|
else: |
|
columns.update({ |
|
"EventDate": "event_date", |
|
"LeagueName": "league_name" |
|
}) |
|
|
|
return df.rename(columns=columns) |
|
|
|
|
|
def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame: |
|
if is_tournament: |
|
df["tournament_start_date"] = pd.to_datetime(df["tournament_start_date"]) |
|
df["tournament_end_date"] = pd.to_datetime(df["tournament_end_date"]) |
|
df["tournament"] = df["tournament"].astype('category') |
|
else: |
|
df["event_date"] = pd.to_datetime(df["event_date"]) |
|
df["league_name"] = df["league_name"].astype('string') |
|
|
|
df["rating"] = df["rating"].astype('int') |
|
df["result"] = df["result"].astype('category') |
|
df["scores"] = df["scores"].astype('string') |
|
df["opponent"] = df["opponent"].astype('category') |
|
df["opponent_rating"] = df["opponent_rating"].astype('int') |
|
|
|
return df |
|
|
|
|
|
def _check_match_type(match_type: str) -> str: |
|
allowed_match_types = {"tournament", "league"} |
|
if match_type not in allowed_match_types: |
|
raise ValueError( |
|
f"The only supported match types are {allowed_match_types}. Found match type of '{match_type}'.") |
|
return match_type |
|
|
|
|
|
def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int: |
|
key_name = "tournament" if is_tournament else "event_date" |
|
return df[key_name].nunique() |
|
|
|
|
|
def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool): |
|
fig = plt.figure() |
|
plt.title('Matches per competition') |
|
sns.histplot(df.groupby('tournament' if is_tournament else "event_date").size()) |
|
plt.xlabel('Number of matches in competition') |
|
return fig |
|
|
|
|
|
def get_competition_name_word_cloud_fig(df: pd.DataFrame, is_tournament: bool): |
|
fig = plt.figure() |
|
key_name = "tournament" if is_tournament else "league_name" |
|
wordcloud = WordCloud().generate(" ".join(df[key_name].values.tolist())) |
|
plt.imshow(wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
return fig |
|
|
|
|
|
def get_opponent_name_word_cloud_fig(df: pd.DataFrame): |
|
fig = plt.figure() |
|
wordcloud = WordCloud().generate(" ".join(df.opponent.values.tolist())) |
|
plt.imshow(wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
return fig |
|
|
|
|
|
def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool): |
|
fig = plt.figure() |
|
plt.title('Rating over time') |
|
sns.lineplot(data=df, |
|
x="tournament_end_date" if is_tournament else "event_date", |
|
y="rating", |
|
marker='.', |
|
markersize=10) |
|
plt.xlabel('Competition date') |
|
plt.ylabel('Rating') |
|
return fig |
|
|
|
|
|
def get_max_int(int_csv_str: str) -> int: |
|
"""Get the max int from an int CSV.""" |
|
ints = [int(i.strip()) for i in int_csv_str.split(',')] |
|
return max(ints) |
|
|
|
|
|
def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]: |
|
if not is_tournament: |
|
return None |
|
return df.loc[[np.argmax(df.scores.apply(get_max_int))]] |
|
|
|
|
|
def get_opponent_rating_distr_fig(df: pd.DataFrame): |
|
fig = plt.figure() |
|
plt.title('Opponent rating distribution') |
|
sns.histplot(data=df, x="opponent_rating", hue='result') |
|
plt.xlabel('Opponent rating') |
|
return fig |
|
|
|
|
|
def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool): |
|
fig, ax = plt.subplots(figsize=(12, 8)) |
|
plt.title(f'Opponent rating distribution over time') |
|
x_key_name = "tournament_end_date" if is_tournament else "event_date" |
|
sns.violinplot(data=df, |
|
x=df[x_key_name].dt.year, |
|
y="opponent_rating", |
|
hue="result", |
|
split=True, |
|
inner='points', |
|
cut=1, |
|
ax=ax) |
|
plt.xlabel('Competition year') |
|
plt.ylabel('Opponent rating') |
|
return fig |
|
|
|
|
|
def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]: |
|
match_type = _check_match_type(file_path.name.split('_')[0]) |
|
is_tournament = match_type == "tournament" |
|
|
|
df = pd.read_csv(file_path) |
|
df = _rename_columns(df, is_tournament) |
|
df = _fix_dtypes(df, is_tournament) |
|
|
|
return df, is_tournament |
|
|
|
|
|
def usatt_rating_analyzer(file_obj): |
|
|
|
df, is_tournament = load_match_df(Path(file_obj.name)) |
|
|
|
|
|
n_competitions_played = get_num_competitions_played(df, is_tournament) |
|
n_matches_played = len(df) |
|
matches_per_competition_fig = get_matches_per_competition_fig(df, is_tournament) |
|
opponent_name_word_cloud_fig = get_opponent_name_word_cloud_fig(df) |
|
competition_name_word_cloud_fig = get_competition_name_word_cloud_fig(df, is_tournament) |
|
rating_over_time_fig = get_rating_over_time_fig(df, is_tournament) |
|
match_with_longest_game = get_match_with_longest_game(df, is_tournament) |
|
opponent_rating_distr_fig = get_opponent_rating_distr_fig(df) |
|
opponent_rating_dist_over_time_fig = get_opponent_rating_dist_over_time_fig(df, is_tournament) |
|
|
|
return (n_competitions_played, |
|
n_matches_played, |
|
matches_per_competition_fig, |
|
opponent_name_word_cloud_fig, |
|
competition_name_word_cloud_fig, |
|
rating_over_time_fig, |
|
match_with_longest_game, |
|
opponent_rating_distr_fig, |
|
opponent_rating_dist_over_time_fig, |
|
) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("""# USATT rating analyzer |
|
Analyze USA table tennis tournament and league results. |
|
|
|
## Downloading match results |
|
1. Make sure you are [logged in](https://usatt.simplycompete.com/login/auth). |
|
2. Find the *active* player you wish to analyze (e.g., [Kanak Jha](https://usatt.simplycompete.com/userAccount/up/3431)). |
|
3. Under 'Tournaments' or 'Leagues', click *Download Tournament/League Match History*. |
|
""") |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_file = gr.File(label='USATT Results File', file_types=['file']) |
|
btn = gr.Button("Analyze") |
|
|
|
with gr.Group(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
num_comps_box = gr.Textbox(lines=1, label="Number of competitions (tournaments/leagues) played") |
|
with gr.Column(): |
|
num_matches_box = gr.Textbox(lines=1, label="Number of matches played") |
|
rating_over_time_plot = gr.Plot(show_label=False) |
|
matches_per_comp_plot = gr.Plot(show_label=False) |
|
with gr.Row(): |
|
with gr.Column(): |
|
opponent_names_plot = gr.Plot(label="Opponent names") |
|
with gr.Column(): |
|
comp_names_plot = gr.Plot(label="Competition names") |
|
|
|
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1) |
|
opponent_rating_dist_plot = gr.Plot(show_label=False) |
|
opponent_rating_dist_over_time_plot = gr.Plot(show_label=False) |
|
|
|
inputs = [input_file] |
|
outputs = [ |
|
num_comps_box, |
|
num_matches_box, |
|
matches_per_comp_plot, |
|
opponent_names_plot, |
|
comp_names_plot, |
|
rating_over_time_plot, |
|
match_longest_game_gdf, |
|
opponent_rating_dist_plot, |
|
opponent_rating_dist_over_time_plot, |
|
] |
|
|
|
btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs) |
|
|
|
demo.launch() |