lschlessinger commited on
Commit
75c0152
·
1 Parent(s): 7f24c35

formatting

Browse files
Files changed (3) hide show
  1. app.py +30 -28
  2. match_parser.py +20 -18
  3. util.py +2 -0
app.py CHANGED
@@ -27,33 +27,34 @@ def usatt_rating_analyzer(file_obj):
27
  worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
28
  highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
29
  rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
30
- match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament), is_tournament)
 
31
  longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
32
  opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
33
  opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
34
 
35
- return (#player_name,
36
- current_rating,
37
- peak_rating,
38
- n_competitions_played,
39
- n_matches_played,
40
- first_comp_year,
41
- n_active_years,
42
- rating_over_time_fig,
43
- opponent_rating_distr_fig,
44
- opponent_rating_dist_over_time_fig,
45
- best_wins,
46
- biggest_upsets,
47
- worst_recent_losses,
48
- best_competitions,
49
- most_frequent_opponents,
50
- highest_rated_opponent,
51
- match_with_longest_game,
52
- longest_match,
53
- opponent_name_word_cloud_fig,
54
- competition_name_word_cloud_fig,
55
- matches_per_competition_fig,
56
- )
57
 
58
 
59
  with gr.Blocks() as demo:
@@ -100,7 +101,8 @@ with gr.Blocks() as demo:
100
  with gr.Column():
101
  first_competition_box = gr.Textbox(lines=1, label="First competition")
102
  with gr.Column():
103
- num_active_years_box = gr.Textbox(lines=1, label="Number of active years (participated in at least 1 competition)")
 
104
 
105
  with gr.Row():
106
  with gr.Column():
@@ -123,8 +125,9 @@ with gr.Blocks() as demo:
123
  with gr.Column():
124
  best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
125
  height=500)
126
- biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
127
- height=500)
 
128
  worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
129
  "post-competition rating from the 5 most recent "
130
  "competitions)", height=500)
@@ -154,7 +157,6 @@ with gr.Blocks() as demo:
154
  with gr.Column():
155
  matches_per_comp_plot = gr.Plot(show_label=False)
156
 
157
-
158
  inputs = [input_file]
159
  outputs = [
160
  # player_name_box,
@@ -183,4 +185,4 @@ with gr.Blocks() as demo:
183
  btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
184
 
185
  if __name__ == "__main__":
186
- demo.launch()
 
27
  worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
28
  highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
29
  rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
30
+ match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament),
31
+ is_tournament)
32
  longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
33
  opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
34
  opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
35
 
36
+ return ( # player_name,
37
+ current_rating,
38
+ peak_rating,
39
+ n_competitions_played,
40
+ n_matches_played,
41
+ first_comp_year,
42
+ n_active_years,
43
+ rating_over_time_fig,
44
+ opponent_rating_distr_fig,
45
+ opponent_rating_dist_over_time_fig,
46
+ best_wins,
47
+ biggest_upsets,
48
+ worst_recent_losses,
49
+ best_competitions,
50
+ most_frequent_opponents,
51
+ highest_rated_opponent,
52
+ match_with_longest_game,
53
+ longest_match,
54
+ opponent_name_word_cloud_fig,
55
+ competition_name_word_cloud_fig,
56
+ matches_per_competition_fig,
57
+ )
58
 
59
 
60
  with gr.Blocks() as demo:
 
101
  with gr.Column():
102
  first_competition_box = gr.Textbox(lines=1, label="First competition")
103
  with gr.Column():
104
+ num_active_years_box = gr.Textbox(lines=1,
105
+ label="Number of active years (participated in at least 1 competition)")
106
 
107
  with gr.Row():
108
  with gr.Column():
 
125
  with gr.Column():
126
  best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
127
  height=500)
128
+ biggest_upsets_gdf = gr.Dataframe(
129
+ label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
130
+ height=500)
131
  worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
132
  "post-competition rating from the 5 most recent "
133
  "competitions)", height=500)
 
157
  with gr.Column():
158
  matches_per_comp_plot = gr.Plot(show_label=False)
159
 
 
160
  inputs = [input_file]
161
  outputs = [
162
  # player_name_box,
 
185
  btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
186
 
187
  if __name__ == "__main__":
188
+ demo.launch()
match_parser.py CHANGED
@@ -55,7 +55,6 @@ def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
55
  return df
56
 
57
 
58
-
59
  def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
60
  """Make a data frame's columns human-readable."""
61
  if df is None:
@@ -80,7 +79,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
80
  else:
81
  return tournament_start_date if missing_end_date else tournament_end_date
82
 
83
- df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']), axis=1)
 
84
  df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
85
 
86
  # Move date to the front.
@@ -96,6 +96,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
96
  df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
97
  return df
98
 
 
99
  def _check_match_type(match_type: str) -> str:
100
  allowed_match_types = {"tournament", "league"}
101
  if match_type not in allowed_match_types:
@@ -121,6 +122,7 @@ def get_player_name(file_stem: str) -> str:
121
  profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
122
  return fetch_player_name(profile_id)
123
 
 
124
  def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
125
  key_name = "tournament_end_date" if is_tournament else "event_date"
126
  return df[key_name].nunique()
@@ -180,7 +182,7 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
180
  y=df["rating"],
181
  name='Rating',
182
  mode='lines+markers',
183
- line=dict( width=0.9),
184
  marker=dict(size=4))),
185
 
186
  # EMA trace
@@ -202,8 +204,6 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
202
  return fig
203
 
204
 
205
-
206
-
207
  def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
208
  if not is_tournament:
209
  return None
@@ -226,7 +226,8 @@ def get_win_loss_record_str(group_df) -> str:
226
  def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
227
  df_with_opponents = df.loc[df.opponent != "-, -"]
228
 
229
- most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg({"result": [get_win_loss_record_str, "size"]})
 
230
  most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
231
  most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
232
  inplace=True)
@@ -252,8 +253,9 @@ def get_worst_recent_losses(df: pd.DataFrame,
252
  top_n_comps: int = 5) -> pd.DataFrame:
253
  """Get the top-k most recent worst losses from the top-n most recent competitions."""
254
  x_key_name = "tournament_end_date" if is_tournament else "event_date"
255
- most_recent_competition_dates =df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
256
- columns=x_key_name)[x_key_name]
 
257
  df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
258
  return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
259
 
@@ -327,15 +329,16 @@ def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool
327
 
328
 
329
  def get_total_match_points(score_str: str) -> int:
330
- single_game_scores = int_csv_to_list(score_str)
331
- total_points = 0
332
- for single_game_score in single_game_scores:
333
- abs_gscore = abs(single_game_score)
334
- if abs_gscore < 10:
335
- total_points += abs_gscore + 11
336
- else:
337
- total_points += 2 * abs_gscore + 2
338
- return total_points
 
339
 
340
  def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
341
  """Get the longest match, where longest is defined as the most number of points played."""
@@ -357,4 +360,3 @@ def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]:
357
  logging.info(f"Loaded match CSV {file_path}.")
358
 
359
  return df, is_tournament
360
-
 
55
  return df
56
 
57
 
 
58
  def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
59
  """Make a data frame's columns human-readable."""
60
  if df is None:
 
79
  else:
80
  return tournament_start_date if missing_end_date else tournament_end_date
81
 
82
+ df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']),
83
+ axis=1)
84
  df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
85
 
86
  # Move date to the front.
 
96
  df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
97
  return df
98
 
99
+
100
  def _check_match_type(match_type: str) -> str:
101
  allowed_match_types = {"tournament", "league"}
102
  if match_type not in allowed_match_types:
 
122
  profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
123
  return fetch_player_name(profile_id)
124
 
125
+
126
  def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
127
  key_name = "tournament_end_date" if is_tournament else "event_date"
128
  return df[key_name].nunique()
 
182
  y=df["rating"],
183
  name='Rating',
184
  mode='lines+markers',
185
+ line=dict(width=0.9),
186
  marker=dict(size=4))),
187
 
188
  # EMA trace
 
204
  return fig
205
 
206
 
 
 
207
  def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
208
  if not is_tournament:
209
  return None
 
226
  def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
227
  df_with_opponents = df.loc[df.opponent != "-, -"]
228
 
229
+ most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg(
230
+ {"result": [get_win_loss_record_str, "size"]})
231
  most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
232
  most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
233
  inplace=True)
 
253
  top_n_comps: int = 5) -> pd.DataFrame:
254
  """Get the top-k most recent worst losses from the top-n most recent competitions."""
255
  x_key_name = "tournament_end_date" if is_tournament else "event_date"
256
+ most_recent_competition_dates = df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
257
+ columns=x_key_name)[
258
+ x_key_name]
259
  df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
260
  return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
261
 
 
329
 
330
 
331
  def get_total_match_points(score_str: str) -> int:
332
+ single_game_scores = int_csv_to_list(score_str)
333
+ total_points = 0
334
+ for single_game_score in single_game_scores:
335
+ abs_gscore = abs(single_game_score)
336
+ if abs_gscore < 10:
337
+ total_points += abs_gscore + 11
338
+ else:
339
+ total_points += 2 * abs_gscore + 2
340
+ return total_points
341
+
342
 
343
  def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
344
  """Get the longest match, where longest is defined as the most number of points played."""
 
360
  logging.info(f"Loaded match CSV {file_path}.")
361
 
362
  return df, is_tournament
 
util.py CHANGED
@@ -4,10 +4,12 @@ from typing import List
4
  def snake_case_to_human_readable(s: str) -> str:
5
  return " ".join(s.capitalize().split("_"))
6
 
 
7
  def int_csv_to_list(int_csv_str: str) -> List[int]:
8
  """Convert a CSV of ints to a list of ints."""
9
  return [int(i.strip()) for i in int_csv_str.split(',') if i]
10
 
 
11
  def get_max_abs_int(int_csv_str: str) -> int:
12
  """Get the max absolute value int from an int CSV."""
13
  abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]
 
4
  def snake_case_to_human_readable(s: str) -> str:
5
  return " ".join(s.capitalize().split("_"))
6
 
7
+
8
  def int_csv_to_list(int_csv_str: str) -> List[int]:
9
  """Convert a CSV of ints to a list of ints."""
10
  return [int(i.strip()) for i in int_csv_str.split(',') if i]
11
 
12
+
13
  def get_max_abs_int(int_csv_str: str) -> int:
14
  """Get the max absolute value int from an int CSV."""
15
  abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]