Commit
·
75c0152
1
Parent(s):
7f24c35
formatting
Browse files- app.py +30 -28
- match_parser.py +20 -18
- util.py +2 -0
app.py
CHANGED
@@ -27,33 +27,34 @@ def usatt_rating_analyzer(file_obj):
|
|
27 |
worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
|
28 |
highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
|
29 |
rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
|
30 |
-
match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament),
|
|
|
31 |
longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
|
32 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
33 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
34 |
|
35 |
-
return (#player_name,
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
|
58 |
|
59 |
with gr.Blocks() as demo:
|
@@ -100,7 +101,8 @@ with gr.Blocks() as demo:
|
|
100 |
with gr.Column():
|
101 |
first_competition_box = gr.Textbox(lines=1, label="First competition")
|
102 |
with gr.Column():
|
103 |
-
num_active_years_box = gr.Textbox(lines=1,
|
|
|
104 |
|
105 |
with gr.Row():
|
106 |
with gr.Column():
|
@@ -123,8 +125,9 @@ with gr.Blocks() as demo:
|
|
123 |
with gr.Column():
|
124 |
best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
|
125 |
height=500)
|
126 |
-
biggest_upsets_gdf = gr.Dataframe(
|
127 |
-
|
|
|
128 |
worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
|
129 |
"post-competition rating from the 5 most recent "
|
130 |
"competitions)", height=500)
|
@@ -154,7 +157,6 @@ with gr.Blocks() as demo:
|
|
154 |
with gr.Column():
|
155 |
matches_per_comp_plot = gr.Plot(show_label=False)
|
156 |
|
157 |
-
|
158 |
inputs = [input_file]
|
159 |
outputs = [
|
160 |
# player_name_box,
|
@@ -183,4 +185,4 @@ with gr.Blocks() as demo:
|
|
183 |
btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
|
184 |
|
185 |
if __name__ == "__main__":
|
186 |
-
demo.launch()
|
|
|
27 |
worst_recent_losses = mp.make_df_columns_readable(mp.get_worst_recent_losses(df, is_tournament), is_tournament)
|
28 |
highest_rated_opponent = mp.make_df_columns_readable(mp.get_highest_rated_opponent(df), is_tournament)
|
29 |
rating_over_time_fig = mp.get_rating_over_time_fig(df, is_tournament)
|
30 |
+
match_with_longest_game = mp.make_df_columns_readable(mp.get_match_with_longest_game(df, is_tournament),
|
31 |
+
is_tournament)
|
32 |
longest_match = mp.make_df_columns_readable(mp.get_longest_match(df, is_tournament), is_tournament)
|
33 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
34 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
35 |
|
36 |
+
return ( # player_name,
|
37 |
+
current_rating,
|
38 |
+
peak_rating,
|
39 |
+
n_competitions_played,
|
40 |
+
n_matches_played,
|
41 |
+
first_comp_year,
|
42 |
+
n_active_years,
|
43 |
+
rating_over_time_fig,
|
44 |
+
opponent_rating_distr_fig,
|
45 |
+
opponent_rating_dist_over_time_fig,
|
46 |
+
best_wins,
|
47 |
+
biggest_upsets,
|
48 |
+
worst_recent_losses,
|
49 |
+
best_competitions,
|
50 |
+
most_frequent_opponents,
|
51 |
+
highest_rated_opponent,
|
52 |
+
match_with_longest_game,
|
53 |
+
longest_match,
|
54 |
+
opponent_name_word_cloud_fig,
|
55 |
+
competition_name_word_cloud_fig,
|
56 |
+
matches_per_competition_fig,
|
57 |
+
)
|
58 |
|
59 |
|
60 |
with gr.Blocks() as demo:
|
|
|
101 |
with gr.Column():
|
102 |
first_competition_box = gr.Textbox(lines=1, label="First competition")
|
103 |
with gr.Column():
|
104 |
+
num_active_years_box = gr.Textbox(lines=1,
|
105 |
+
label="Number of active years (participated in at least 1 competition)")
|
106 |
|
107 |
with gr.Row():
|
108 |
with gr.Column():
|
|
|
125 |
with gr.Column():
|
126 |
best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
|
127 |
height=500)
|
128 |
+
biggest_upsets_gdf = gr.Dataframe(
|
129 |
+
label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
|
130 |
+
height=500)
|
131 |
worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
|
132 |
"post-competition rating from the 5 most recent "
|
133 |
"competitions)", height=500)
|
|
|
157 |
with gr.Column():
|
158 |
matches_per_comp_plot = gr.Plot(show_label=False)
|
159 |
|
|
|
160 |
inputs = [input_file]
|
161 |
outputs = [
|
162 |
# player_name_box,
|
|
|
185 |
btn.click(usatt_rating_analyzer, inputs=inputs, outputs=outputs)
|
186 |
|
187 |
if __name__ == "__main__":
|
188 |
+
demo.launch()
|
match_parser.py
CHANGED
@@ -55,7 +55,6 @@ def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
|
|
55 |
return df
|
56 |
|
57 |
|
58 |
-
|
59 |
def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
|
60 |
"""Make a data frame's columns human-readable."""
|
61 |
if df is None:
|
@@ -80,7 +79,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
|
|
80 |
else:
|
81 |
return tournament_start_date if missing_end_date else tournament_end_date
|
82 |
|
83 |
-
df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']),
|
|
|
84 |
df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
|
85 |
|
86 |
# Move date to the front.
|
@@ -96,6 +96,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
|
|
96 |
df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
|
97 |
return df
|
98 |
|
|
|
99 |
def _check_match_type(match_type: str) -> str:
|
100 |
allowed_match_types = {"tournament", "league"}
|
101 |
if match_type not in allowed_match_types:
|
@@ -121,6 +122,7 @@ def get_player_name(file_stem: str) -> str:
|
|
121 |
profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
|
122 |
return fetch_player_name(profile_id)
|
123 |
|
|
|
124 |
def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
|
125 |
key_name = "tournament_end_date" if is_tournament else "event_date"
|
126 |
return df[key_name].nunique()
|
@@ -180,7 +182,7 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
|
|
180 |
y=df["rating"],
|
181 |
name='Rating',
|
182 |
mode='lines+markers',
|
183 |
-
line=dict(
|
184 |
marker=dict(size=4))),
|
185 |
|
186 |
# EMA trace
|
@@ -202,8 +204,6 @@ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int =
|
|
202 |
return fig
|
203 |
|
204 |
|
205 |
-
|
206 |
-
|
207 |
def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
|
208 |
if not is_tournament:
|
209 |
return None
|
@@ -226,7 +226,8 @@ def get_win_loss_record_str(group_df) -> str:
|
|
226 |
def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
227 |
df_with_opponents = df.loc[df.opponent != "-, -"]
|
228 |
|
229 |
-
most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg(
|
|
|
230 |
most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
|
231 |
most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
|
232 |
inplace=True)
|
@@ -252,8 +253,9 @@ def get_worst_recent_losses(df: pd.DataFrame,
|
|
252 |
top_n_comps: int = 5) -> pd.DataFrame:
|
253 |
"""Get the top-k most recent worst losses from the top-n most recent competitions."""
|
254 |
x_key_name = "tournament_end_date" if is_tournament else "event_date"
|
255 |
-
most_recent_competition_dates =df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
|
256 |
-
|
|
|
257 |
df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
|
258 |
return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
|
259 |
|
@@ -327,15 +329,16 @@ def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool
|
|
327 |
|
328 |
|
329 |
def get_total_match_points(score_str: str) -> int:
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
|
|
339 |
|
340 |
def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
|
341 |
"""Get the longest match, where longest is defined as the most number of points played."""
|
@@ -357,4 +360,3 @@ def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]:
|
|
357 |
logging.info(f"Loaded match CSV {file_path}.")
|
358 |
|
359 |
return df, is_tournament
|
360 |
-
|
|
|
55 |
return df
|
56 |
|
57 |
|
|
|
58 |
def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
|
59 |
"""Make a data frame's columns human-readable."""
|
60 |
if df is None:
|
|
|
79 |
else:
|
80 |
return tournament_start_date if missing_end_date else tournament_end_date
|
81 |
|
82 |
+
df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']),
|
83 |
+
axis=1)
|
84 |
df = df.drop(columns=["tournament_start_date", "tournament_end_date"])
|
85 |
|
86 |
# Move date to the front.
|
|
|
96 |
df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
|
97 |
return df
|
98 |
|
99 |
+
|
100 |
def _check_match_type(match_type: str) -> str:
|
101 |
allowed_match_types = {"tournament", "league"}
|
102 |
if match_type not in allowed_match_types:
|
|
|
122 |
profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
|
123 |
return fetch_player_name(profile_id)
|
124 |
|
125 |
+
|
126 |
def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
|
127 |
key_name = "tournament_end_date" if is_tournament else "event_date"
|
128 |
return df[key_name].nunique()
|
|
|
182 |
y=df["rating"],
|
183 |
name='Rating',
|
184 |
mode='lines+markers',
|
185 |
+
line=dict(width=0.9),
|
186 |
marker=dict(size=4))),
|
187 |
|
188 |
# EMA trace
|
|
|
204 |
return fig
|
205 |
|
206 |
|
|
|
|
|
207 |
def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
|
208 |
if not is_tournament:
|
209 |
return None
|
|
|
226 |
def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
227 |
df_with_opponents = df.loc[df.opponent != "-, -"]
|
228 |
|
229 |
+
most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg(
|
230 |
+
{"result": [get_win_loss_record_str, "size"]})
|
231 |
most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
|
232 |
most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
|
233 |
inplace=True)
|
|
|
253 |
top_n_comps: int = 5) -> pd.DataFrame:
|
254 |
"""Get the top-k most recent worst losses from the top-n most recent competitions."""
|
255 |
x_key_name = "tournament_end_date" if is_tournament else "event_date"
|
256 |
+
most_recent_competition_dates = df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
|
257 |
+
columns=x_key_name)[
|
258 |
+
x_key_name]
|
259 |
df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
|
260 |
return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)
|
261 |
|
|
|
329 |
|
330 |
|
331 |
def get_total_match_points(score_str: str) -> int:
|
332 |
+
single_game_scores = int_csv_to_list(score_str)
|
333 |
+
total_points = 0
|
334 |
+
for single_game_score in single_game_scores:
|
335 |
+
abs_gscore = abs(single_game_score)
|
336 |
+
if abs_gscore < 10:
|
337 |
+
total_points += abs_gscore + 11
|
338 |
+
else:
|
339 |
+
total_points += 2 * abs_gscore + 2
|
340 |
+
return total_points
|
341 |
+
|
342 |
|
343 |
def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
|
344 |
"""Get the longest match, where longest is defined as the most number of points played."""
|
|
|
360 |
logging.info(f"Loaded match CSV {file_path}.")
|
361 |
|
362 |
return df, is_tournament
|
|
util.py
CHANGED
@@ -4,10 +4,12 @@ from typing import List
|
|
4 |
def snake_case_to_human_readable(s: str) -> str:
|
5 |
return " ".join(s.capitalize().split("_"))
|
6 |
|
|
|
7 |
def int_csv_to_list(int_csv_str: str) -> List[int]:
|
8 |
"""Convert a CSV of ints to a list of ints."""
|
9 |
return [int(i.strip()) for i in int_csv_str.split(',') if i]
|
10 |
|
|
|
11 |
def get_max_abs_int(int_csv_str: str) -> int:
|
12 |
"""Get the max absolute value int from an int CSV."""
|
13 |
abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]
|
|
|
4 |
def snake_case_to_human_readable(s: str) -> str:
|
5 |
return " ".join(s.capitalize().split("_"))
|
6 |
|
7 |
+
|
8 |
def int_csv_to_list(int_csv_str: str) -> List[int]:
|
9 |
"""Convert a CSV of ints to a list of ints."""
|
10 |
return [int(i.strip()) for i in int_csv_str.split(',') if i]
|
11 |
|
12 |
+
|
13 |
def get_max_abs_int(int_csv_str: str) -> int:
|
14 |
"""Get the max absolute value int from an int CSV."""
|
15 |
abs_ints = [abs(i) for i in int_csv_to_list(int_csv_str)]
|