Spaces:

lschlessinger
/

usatt-rating-analyzer

Running

App Files Files Community

lschlessinger commited on Mar 27, 2024

Commit

75c0152

1 Parent(s): 7f24c35

formatting

Browse files

Files changed (3) hide show

app.py +30 -28
match_parser.py +20 -18
util.py +2 -0

app.py CHANGED Viewed

@@ -27,33 +27,34 @@ def usatt_rating_analyzer(file_obj):
     worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
     highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
     rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
-    match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament), is_tournament)
     longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
     opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
     opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
-    return (#player_name,
-            current_rating,
-            peak_rating,
-            n_competitions_played,
-            n_matches_played,
-            first_comp_year,
-            n_active_years,
-            rating_over_time_fig,
-            opponent_rating_distr_fig,
-            opponent_rating_dist_over_time_fig,
-            best_wins,
-            biggest_upsets,
-            worst_recent_losses,
-            best_competitions,
-            most_frequent_opponents,
-            highest_rated_opponent,
-            match_with_longest_game,
-            longest_match,
-            opponent_name_word_cloud_fig,
-            competition_name_word_cloud_fig,
-            matches_per_competition_fig,
-            )
 with gr.Blocks() as demo:
@@ -100,7 +101,8 @@ with gr.Blocks() as demo:
             with gr.Column():
                 first_competition_box = gr.Textbox(lines=1, label="First competition")
             with gr.Column():
-                num_active_years_box = gr.Textbox(lines=1, label="Number of active years (participated in at least 1 competition)")
         with gr.Row():
             with gr.Column():
@@ -123,8 +125,9 @@ with gr.Blocks() as demo:
             with gr.Column():
                 best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
                                              height=500)
-                biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
-                                                  height=500)
                 worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
                                                              "post-competition rating from the 5 most recent "
                                                              "competitions)", height=500)
@@ -154,7 +157,6 @@ with gr.Blocks() as demo:
             with gr.Column():
                 matches_per_comp_plot = gr.Plot(show_label=False)
     inputs = [input_file]
     outputs = [
         # player_name_box,
@@ -183,4 +185,4 @@ with gr.Blocks() as demo:
     btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
 if __name__ == "__main__":
-    demo.launch()

     worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
     highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
     rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
+    match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament),
+                                                          is_tournament)
     longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
     opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
     opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
+    return (  # player_name,
+        current_rating,
+        peak_rating,
+        n_competitions_played,
+        n_matches_played,
+        first_comp_year,
+        n_active_years,
+        rating_over_time_fig,
+        opponent_rating_distr_fig,
+        opponent_rating_dist_over_time_fig,
+        best_wins,
+        biggest_upsets,
+        worst_recent_losses,
+        best_competitions,
+        most_frequent_opponents,
+        highest_rated_opponent,
+        match_with_longest_game,
+        longest_match,
+        opponent_name_word_cloud_fig,
+        competition_name_word_cloud_fig,
+        matches_per_competition_fig,
+    )
 with gr.Blocks() as demo:
             with gr.Column():
                 first_competition_box = gr.Textbox(lines=1, label="First competition")
             with gr.Column():
+                num_active_years_box = gr.Textbox(lines=1,
+                                                  label="Number of active years (participated in at least 1 competition)")
         with gr.Row():
             with gr.Column():
             with gr.Column():
                 best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
                                              height=500)
+                biggest_upsets_gdf = gr.Dataframe(
+                    label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
+                    height=500)
                 worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
                                                              "post-competition rating from the 5 most recent "
                                                              "competitions)", height=500)
             with gr.Column():
                 matches_per_comp_plot = gr.Plot(show_label=False)
     inputs = [input_file]
     outputs = [
         # player_name_box,
     btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
 if __name__ == "__main__":
+    demo.launch()

match_parser.py CHANGED Viewed

@@ -55,7 +55,6 @@ def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
     return df
 def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
     """Make a data frame's columns human-readable."""
     if df is None:
@@ -80,7 +79,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
                 else:
                     return tournament_start_date if missing_end_date else tournament_end_date
-            df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']), axis=1)
             df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
             # Move date to the front.
@@ -96,6 +96,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
     df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
     return df
 def _check_match_type(match_type: str) -> str:
     allowed_match_types = {"tournament", "league"}
     if match_type not in allowed_match_types:
@@ -121,6 +122,7 @@ def get_player_name(file_stem: str) -> str:
     profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
     return fetch_player_name(profile_id)
 def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
     key_name = "tournament_end_date" if is_tournament else "event_date"
     return df[key_name].nunique()
@@ -180,7 +182,7 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
                              y=df["rating"],
                              name='Rating',
                              mode='lines+markers',
-                             line=dict( width=0.9),
                              marker=dict(size=4))),
     # EMA trace
@@ -202,8 +204,6 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
     return fig
 def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
     if not is_tournament:
         return None
@@ -226,7 +226,8 @@ def get_win_loss_record_str(group_df) -> str:
 def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
     df_with_opponents = df.loc[df.opponent != "-, -"]
-    most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg({"result": [get_win_loss_record_str, "size"]})
     most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
     most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
                                     inplace=True)
@@ -252,8 +253,9 @@ def get_worst_recent_losses(df: pd.DataFrame,
                             top_n_comps: int = 5) -> pd.DataFrame:
     """Get the top-k most recent worst losses from the top-n most recent competitions."""
     x_key_name = "tournament_end_date" if is_tournament else "event_date"
-    most_recent_competition_dates =df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
-                                                                                         columns=x_key_name)[x_key_name]
     df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
     return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
@@ -327,15 +329,16 @@ def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool
 def get_total_match_points(score_str: str) -> int:
-  single_game_scores = int_csv_to_list(score_str)
-  total_points = 0
-  for single_game_score in single_game_scores:
-    abs_gscore = abs(single_game_score)
-    if abs_gscore < 10:
-      total_points += abs_gscore + 11
-    else:
-      total_points += 2 * abs_gscore + 2
-  return total_points
 def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
     """Get the longest match, where longest is defined as the most number of points played."""
@@ -357,4 +360,3 @@ def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]:
     logging.info(f"Loaded match CSV {file_path}.")
     return df, is_tournament

     return df
 def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
     """Make a data frame's columns human-readable."""
     if df is None:
                 else:
                     return tournament_start_date if missing_end_date else tournament_end_date
+            df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']),
+                                  axis=1)
             df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
             # Move date to the front.
     df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
     return df
 def _check_match_type(match_type: str) -> str:
     allowed_match_types = {"tournament", "league"}
     if match_type not in allowed_match_types:
     profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
     return fetch_player_name(profile_id)
 def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
     key_name = "tournament_end_date" if is_tournament else "event_date"
     return df[key_name].nunique()
                              y=df["rating"],
                              name='Rating',
                              mode='lines+markers',
+                             line=dict(width=0.9),
                              marker=dict(size=4))),
     # EMA trace
     return fig
 def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
     if not is_tournament:
         return None
 def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
     df_with_opponents = df.loc[df.opponent != "-, -"]
+    most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg(
+        {"result": [get_win_loss_record_str, "size"]})
     most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
     most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
                                     inplace=True)
                             top_n_comps: int = 5) -> pd.DataFrame:
     """Get the top-k most recent worst losses from the top-n most recent competitions."""
     x_key_name = "tournament_end_date" if is_tournament else "event_date"
+    most_recent_competition_dates = df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
+                                                                                          columns=x_key_name)[
+        x_key_name]
     df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
     return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
 def get_total_match_points(score_str: str) -> int:
+    single_game_scores = int_csv_to_list(score_str)
+    total_points = 0
+    for single_game_score in single_game_scores:
+        abs_gscore = abs(single_game_score)
+        if abs_gscore < 10:
+            total_points += abs_gscore + 11
+        else:
+            total_points += 2 * abs_gscore + 2
+    return total_points
 def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
     """Get the longest match, where longest is defined as the most number of points played."""
     logging.info(f"Loaded match CSV {file_path}.")
     return df, is_tournament

util.py CHANGED Viewed

@@ -4,10 +4,12 @@ from typing import List
 def snake_case_to_human_readable(s: str) -> str:
     return " ".join(s.capitalize().split("_"))
 def int_csv_to_list(int_csv_str: str) -> List[int]:
     """Convert a CSV of ints to a list of ints."""
     return [int(i.strip()) for i in int_csv_str.split(',') if i]
 def get_max_abs_int(int_csv_str: str) -> int:
     """Get the max absolute value int from an int CSV."""
     abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]

 def snake_case_to_human_readable(s: str) -> str:
     return " ".join(s.capitalize().split("_"))
 def int_csv_to_list(int_csv_str: str) -> List[int]:
     """Convert a CSV of ints to a list of ints."""
     return [int(i.strip()) for i in int_csv_str.split(',') if i]
 def get_max_abs_int(int_csv_str: str) -> int:
     """Get the max absolute value int from an int CSV."""
     abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]