Spaces:

lschlessinger
/

usatt-rating-analyzer

Running

App Files Files Community

lschlessinger commited on Mar 27, 2024

Commit

7f24c35

1 Parent(s): 62ef16f

add ema and fix bugs

Browse files

Files changed (2) hide show

app.py +8 -8
match_parser.py +23 -5

app.py CHANGED Viewed

@@ -122,12 +122,12 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
-                                             max_rows=5)
                 biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
-                                                  max_rows=5)
                 worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
                                                              "post-competition rating from the 5 most recent "
-                                                             "competitions)", max_rows=5)
         gr.Markdown("""<br />
@@ -140,11 +140,11 @@ with gr.Blocks() as demo:
             with gr.Column():
                 best_competitions_gdf = gr.Dataframe(
                     label="Best competitions (those having the largest increase in rating)",
-                    max_rows=5)
-                most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
-                highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent", max_rows=1)
-                match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
-                longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)", max_rows=1)
         with gr.Row():
             with gr.Column():

         with gr.Row():
             with gr.Column():
                 best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
+                                             height=500)
                 biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
+                                                  height=500)
                 worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
                                                              "post-competition rating from the 5 most recent "
+                                                             "competitions)", height=500)
         gr.Markdown("""<br />
             with gr.Column():
                 best_competitions_gdf = gr.Dataframe(
                     label="Best competitions (those having the largest increase in rating)",
+                    height=500)
+                most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", height=500)
+                highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent", height=100)
+                match_longest_game_gdf = gr.Dataframe(label="Match with longest game", height=100)
+                longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)", height=100)
         with gr.Row():
             with gr.Column():

match_parser.py CHANGED Viewed

@@ -64,6 +64,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
     nat_to_none = lambda x: None if x == "NaT" else x
     if is_tournament:
         if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
             df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
             df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
@@ -87,6 +89,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
             df = df.loc[:, columns]
     else:
         if "event_date" in df.columns:
             df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
         df = df.rename(columns={"league_name": "league"})
@@ -144,7 +147,7 @@ def get_max_rating(df: pd.DataFrame) -> int:
 def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
     fig = plt.figure()
     plt.title('Matches per competition')
-    sns.histplot(df.groupby('tournament' if is_tournament else "event_date").size())
     plt.xlabel('Number of matches in competition')
     return fig
@@ -166,19 +169,33 @@ def get_opponent_name_word_cloud_fig(df: pd.DataFrame):
     return fig
-def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool):
     fig = go.Figure()
-    fig.add_trace(go.Scatter(x=df["tournament_end_date" if is_tournament else "event_date"],
                              y=df["rating"],
                              mode='lines+markers',
                              line=dict( width=0.9),
                              marker=dict(size=4))),
     fig.update_layout(
         title='Rating over time',
         xaxis_title='Competition date',
         yaxis_title='Rating',
-        showlegend=False,
         template="plotly_white",
     )
@@ -209,7 +226,7 @@ def get_win_loss_record_str(group_df) -> str:
 def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
     df_with_opponents = df.loc[df.opponent != "-, -"]
-    most_common_opponents_df = df_with_opponents.groupby('opponent').agg({"result": [get_win_loss_record_str, "size"]})
     most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
     most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
                                     inplace=True)
@@ -260,6 +277,7 @@ def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5)
     df = grouped.apply(lambda x: assign_pre_comp_rating(x))
     df['rating_increase'] = df['rating'] - df['pre-competition_rating']
     best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
     tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(

     nat_to_none = lambda x: None if x == "NaT" else x
     if is_tournament:
         if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
+            df['tournament_start_date'] = pd.to_datetime(df['tournament_start_date'])
+            df['tournament_end_date'] = pd.to_datetime(df['tournament_end_date'])
             df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
             df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
             df = df.loc[:, columns]
     else:
         if "event_date" in df.columns:
+            df['event_date'] = pd.to_datetime(df['event_date'])
             df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
         df = df.rename(columns={"league_name": "league"})
 def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
     fig = plt.figure()
     plt.title('Matches per competition')
+    sns.histplot(df.groupby('tournament' if is_tournament else "event_date", observed=False).size())
     plt.xlabel('Number of matches in competition')
     return fig
     return fig
+def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int = 60):
+    df['ema'] = df['rating'].ewm(span=span, adjust=False).mean()
     fig = go.Figure()
+    # Raw rating over time trace
+    x_key_name = "tournament_end_date" if is_tournament else "event_date"
+    fig.add_trace(go.Scatter(x=df[x_key_name],
                              y=df["rating"],
+                             name='Rating',
                              mode='lines+markers',
                              line=dict( width=0.9),
                              marker=dict(size=4))),
+    # EMA trace
+    fig.add_trace(go.Scatter(x=df[x_key_name],
+                             y=df["ema"],
+                             mode='lines',
+                             name='Rating EMA',
+                             visible='legendonly',
+                             line=dict(width=1.5, dash='dot')))
     fig.update_layout(
         title='Rating over time',
         xaxis_title='Competition date',
         yaxis_title='Rating',
+        showlegend=True,
         template="plotly_white",
     )
 def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
     df_with_opponents = df.loc[df.opponent != "-, -"]
+    most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg({"result": [get_win_loss_record_str, "size"]})
     most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
     most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
                                     inplace=True)
     df = grouped.apply(lambda x: assign_pre_comp_rating(x))
     df['rating_increase'] = df['rating'] - df['pre-competition_rating']
+    df.reset_index(drop=True, inplace=True)
     best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
     tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(