lschlessinger commited on
Commit
bfe3be9
·
1 Parent(s): ac4124a

feat: add best tournaments

Browse files

also removes player name feature

Files changed (2) hide show
  1. app.py +12 -6
  2. match_parser.py +37 -0
app.py CHANGED
@@ -10,7 +10,7 @@ def usatt_rating_analyzer(file_obj):
10
  df, is_tournament = mp.load_match_df(Path(file_obj.name))
11
 
12
  # Create outputs.
13
- player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
14
  current_rating = mp.get_current_rating(df)
15
  peak_rating = mp.get_max_rating(df)
16
  n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
@@ -18,6 +18,7 @@ def usatt_rating_analyzer(file_obj):
18
  matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
19
  opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
20
  competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
 
21
  most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
22
  best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
23
  biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
@@ -27,7 +28,7 @@ def usatt_rating_analyzer(file_obj):
27
  opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
28
  opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
29
 
30
- return (player_name,
31
  current_rating,
32
  peak_rating,
33
  n_competitions_played,
@@ -37,6 +38,7 @@ def usatt_rating_analyzer(file_obj):
37
  opponent_rating_dist_over_time_fig,
38
  best_wins,
39
  biggest_upsets,
 
40
  most_frequent_opponents,
41
  highest_rated_opponent,
42
  match_with_longest_game,
@@ -75,9 +77,9 @@ with gr.Blocks() as demo:
75
  """)
76
 
77
  with gr.Group():
78
- with gr.Row():
79
- with gr.Column():
80
- player_name_box = gr.Textbox(lines=1, label="Player name")
81
  with gr.Row():
82
  with gr.Column():
83
  current_rating_box = gr.Textbox(lines=1, label="Current rating")
@@ -121,6 +123,9 @@ with gr.Blocks() as demo:
121
 
122
  with gr.Row():
123
  with gr.Column():
 
 
 
124
  most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
125
  highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
126
  match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
@@ -136,7 +141,7 @@ with gr.Blocks() as demo:
136
 
137
  inputs = [input_file]
138
  outputs = [
139
- player_name_box,
140
  current_rating_box,
141
  peak_rating_box,
142
  num_comps_box,
@@ -146,6 +151,7 @@ with gr.Blocks() as demo:
146
  opponent_rating_dist_over_time_plot,
147
  best_wins_gdf,
148
  biggest_upsets_gdf,
 
149
  most_frequent_opponents_gdf,
150
  highest_rated_opponent_gdf,
151
  match_longest_game_gdf,
 
10
  df, is_tournament = mp.load_match_df(Path(file_obj.name))
11
 
12
  # Create outputs.
13
+ # player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
14
  current_rating = mp.get_current_rating(df)
15
  peak_rating = mp.get_max_rating(df)
16
  n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
 
18
  matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
19
  opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
20
  competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
21
+ best_competitions = mp.make_df_columns_readable(mp.get_best_competitions(df, is_tournament), is_tournament)
22
  most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
23
  best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
24
  biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
 
28
  opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
29
  opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
30
 
31
+ return (#player_name,
32
  current_rating,
33
  peak_rating,
34
  n_competitions_played,
 
38
  opponent_rating_dist_over_time_fig,
39
  best_wins,
40
  biggest_upsets,
41
+ best_competitions,
42
  most_frequent_opponents,
43
  highest_rated_opponent,
44
  match_with_longest_game,
 
77
  """)
78
 
79
  with gr.Group():
80
+ # with gr.Row():
81
+ # with gr.Column():
82
+ # player_name_box = gr.Textbox(lines=1, label="Player name")
83
  with gr.Row():
84
  with gr.Column():
85
  current_rating_box = gr.Textbox(lines=1, label="Current rating")
 
123
 
124
  with gr.Row():
125
  with gr.Column():
126
+ best_competitions_gdf = gr.Dataframe(
127
+ label="Best competitions (those having the largest increase in rating)",
128
+ max_rows=5)
129
  most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
130
  highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
131
  match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
 
141
 
142
  inputs = [input_file]
143
  outputs = [
144
+ # player_name_box,
145
  current_rating_box,
146
  peak_rating_box,
147
  num_comps_box,
 
151
  opponent_rating_dist_over_time_plot,
152
  best_wins_gdf,
153
  biggest_upsets_gdf,
154
+ best_competitions_gdf,
155
  most_frequent_opponents_gdf,
156
  highest_rated_opponent_gdf,
157
  match_longest_game_gdf,
match_parser.py CHANGED
@@ -219,6 +219,43 @@ def get_biggest_upsets(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
219
  return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
220
 
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
223
  return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
224
 
 
219
  return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
220
 
221
 
222
+ def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5) -> pd.DataFrame:
223
+ # First add pre-competition ratings
224
+ x_key_name = "tournament_end_date" if is_tournament else "event_date"
225
+ grouped = df.groupby(x_key_name)
226
+
227
+ # We incorrectly fill the first pre-competition rating to the first rating so that
228
+ # the top-k rating differences make sense.
229
+ fill_value = df.iloc[-1].rating
230
+ pre_comp_ratings_by_group = grouped['rating'].first().shift(periods=1, fill_value=fill_value)
231
+
232
+ def assign_pre_comp_rating(group_df):
233
+ """Assign a pre-competition rating to a given group."""
234
+ comp_end_date = group_df[x_key_name].unique()[0]
235
+ group_df['pre-competition_rating'] = pre_comp_ratings_by_group.loc[comp_end_date]
236
+ return group_df
237
+
238
+ df = grouped.apply(lambda x: assign_pre_comp_rating(x))
239
+
240
+ df['rating_increase'] = df['rating'] - df['pre-competition_rating']
241
+ best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
242
+
243
+ tournament_df = df.loc[df.tournament_end_date.isin(best_competition_dates)].groupby(
244
+ [x_key_name]).first().sort_values(by='rating_increase', ascending=False).reset_index()
245
+
246
+ cols = []
247
+ if is_tournament:
248
+ cols += ['tournament_start_date', 'tournament_end_date', 'tournament']
249
+ else:
250
+ cols += ["event_date", "league_name"]
251
+ cols += ['rating_increase', 'pre-competition_rating', 'rating']
252
+
253
+ tournament_df = tournament_df[cols]
254
+ tournament_df = tournament_df.rename(columns={"rating": "post-competition_rating"})
255
+
256
+ return tournament_df
257
+
258
+
259
  def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
260
  return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
261