Spaces:

lschlessinger
/

usatt-rating-analyzer

Running

App Files Files Community

lschlessinger commited on Feb 2, 2023

Commit

bfe3be9

1 Parent(s): ac4124a

feat: add best tournaments

Browse files

also removes player name feature

Files changed (2) hide show

app.py +12 -6
match_parser.py +37 -0

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ def usatt_rating_analyzer(file_obj):
     df, is_tournament = mp.load_match_df(Path(file_obj.name))
     # Create outputs.
-    player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
     current_rating = mp.get_current_rating(df)
     peak_rating = mp.get_max_rating(df)
     n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
@@ -18,6 +18,7 @@ def usatt_rating_analyzer(file_obj):
     matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
     opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
     competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
     most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
     best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
     biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
@@ -27,7 +28,7 @@ def usatt_rating_analyzer(file_obj):
     opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
     opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
-    return (player_name,
             current_rating,
             peak_rating,
             n_competitions_played,
@@ -37,6 +38,7 @@ def usatt_rating_analyzer(file_obj):
             opponent_rating_dist_over_time_fig,
             best_wins,
             biggest_upsets,
             most_frequent_opponents,
             highest_rated_opponent,
             match_with_longest_game,
@@ -75,9 +77,9 @@ with gr.Blocks() as demo:
     """)
     with gr.Group():
-        with gr.Row():
-            with gr.Column():
-                player_name_box = gr.Textbox(lines=1, label="Player name")
         with gr.Row():
             with gr.Column():
                 current_rating_box = gr.Textbox(lines=1, label="Current rating")
@@ -121,6 +123,9 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
                 highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
                 match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
@@ -136,7 +141,7 @@ with gr.Blocks() as demo:
     inputs = [input_file]
     outputs = [
-        player_name_box,
         current_rating_box,
         peak_rating_box,
         num_comps_box,
@@ -146,6 +151,7 @@ with gr.Blocks() as demo:
         opponent_rating_dist_over_time_plot,
         best_wins_gdf,
         biggest_upsets_gdf,
         most_frequent_opponents_gdf,
         highest_rated_opponent_gdf,
         match_longest_game_gdf,

     df, is_tournament = mp.load_match_df(Path(file_obj.name))
     # Create outputs.
+    # player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
     current_rating = mp.get_current_rating(df)
     peak_rating = mp.get_max_rating(df)
     n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
     matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
     opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
     competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
+    best_competitions = mp.make_df_columns_readable(mp.get_best_competitions(df, is_tournament), is_tournament)
     most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
     best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
     biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
     opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
     opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
+    return (#player_name,
             current_rating,
             peak_rating,
             n_competitions_played,
             opponent_rating_dist_over_time_fig,
             best_wins,
             biggest_upsets,
+            best_competitions,
             most_frequent_opponents,
             highest_rated_opponent,
             match_with_longest_game,
     """)
     with gr.Group():
+        # with gr.Row():
+        #     with gr.Column():
+        #         player_name_box = gr.Textbox(lines=1, label="Player name")
         with gr.Row():
             with gr.Column():
                 current_rating_box = gr.Textbox(lines=1, label="Current rating")
         with gr.Row():
             with gr.Column():
+                best_competitions_gdf = gr.Dataframe(
+                    label="Best competitions (those having the largest increase in rating)",
+                    max_rows=5)
                 most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
                 highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
                 match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
     inputs = [input_file]
     outputs = [
+        # player_name_box,
         current_rating_box,
         peak_rating_box,
         num_comps_box,
         opponent_rating_dist_over_time_plot,
         best_wins_gdf,
         biggest_upsets_gdf,
+        best_competitions_gdf,
         most_frequent_opponents_gdf,
         highest_rated_opponent_gdf,
         match_longest_game_gdf,

match_parser.py CHANGED Viewed

@@ -219,6 +219,43 @@ def get_biggest_upsets(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
     return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
 def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
     return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()

     return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
+def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5) -> pd.DataFrame:
+    # First add pre-competition ratings
+    x_key_name = "tournament_end_date" if is_tournament else "event_date"
+    grouped = df.groupby(x_key_name)
+    # We incorrectly fill the first pre-competition rating to the first rating so that
+    # the top-k rating differences make sense.
+    fill_value = df.iloc[-1].rating
+    pre_comp_ratings_by_group = grouped['rating'].first().shift(periods=1, fill_value=fill_value)
+    def assign_pre_comp_rating(group_df):
+        """Assign a pre-competition rating to a given group."""
+        comp_end_date = group_df[x_key_name].unique()[0]
+        group_df['pre-competition_rating'] = pre_comp_ratings_by_group.loc[comp_end_date]
+        return group_df
+    df = grouped.apply(lambda x: assign_pre_comp_rating(x))
+    df['rating_increase'] = df['rating'] - df['pre-competition_rating']
+    best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
+    tournament_df = df.loc[df.tournament_end_date.isin(best_competition_dates)].groupby(
+        [x_key_name]).first().sort_values(by='rating_increase', ascending=False).reset_index()
+    cols = []
+    if is_tournament:
+        cols += ['tournament_start_date', 'tournament_end_date', 'tournament']
+    else:
+        cols += ["event_date", "league_name"]
+    cols += ['rating_increase', 'pre-competition_rating', 'rating']
+    tournament_df = tournament_df[cols]
+    tournament_df = tournament_df.rename(columns={"rating": "post-competition_rating"})
+    return tournament_df
 def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
     return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()