Commit
·
bfe3be9
1
Parent(s):
ac4124a
feat: add best tournaments
Browse filesalso removes player name feature
- app.py +12 -6
- match_parser.py +37 -0
app.py
CHANGED
@@ -10,7 +10,7 @@ def usatt_rating_analyzer(file_obj):
|
|
10 |
df, is_tournament = mp.load_match_df(Path(file_obj.name))
|
11 |
|
12 |
# Create outputs.
|
13 |
-
player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
|
14 |
current_rating = mp.get_current_rating(df)
|
15 |
peak_rating = mp.get_max_rating(df)
|
16 |
n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
|
@@ -18,6 +18,7 @@ def usatt_rating_analyzer(file_obj):
|
|
18 |
matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
|
19 |
opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
|
20 |
competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
|
|
|
21 |
most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
|
22 |
best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
|
23 |
biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
|
@@ -27,7 +28,7 @@ def usatt_rating_analyzer(file_obj):
|
|
27 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
28 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
29 |
|
30 |
-
return (player_name,
|
31 |
current_rating,
|
32 |
peak_rating,
|
33 |
n_competitions_played,
|
@@ -37,6 +38,7 @@ def usatt_rating_analyzer(file_obj):
|
|
37 |
opponent_rating_dist_over_time_fig,
|
38 |
best_wins,
|
39 |
biggest_upsets,
|
|
|
40 |
most_frequent_opponents,
|
41 |
highest_rated_opponent,
|
42 |
match_with_longest_game,
|
@@ -75,9 +77,9 @@ with gr.Blocks() as demo:
|
|
75 |
""")
|
76 |
|
77 |
with gr.Group():
|
78 |
-
with gr.Row():
|
79 |
-
|
80 |
-
|
81 |
with gr.Row():
|
82 |
with gr.Column():
|
83 |
current_rating_box = gr.Textbox(lines=1, label="Current rating")
|
@@ -121,6 +123,9 @@ with gr.Blocks() as demo:
|
|
121 |
|
122 |
with gr.Row():
|
123 |
with gr.Column():
|
|
|
|
|
|
|
124 |
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
|
125 |
highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
|
126 |
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
|
@@ -136,7 +141,7 @@ with gr.Blocks() as demo:
|
|
136 |
|
137 |
inputs = [input_file]
|
138 |
outputs = [
|
139 |
-
player_name_box,
|
140 |
current_rating_box,
|
141 |
peak_rating_box,
|
142 |
num_comps_box,
|
@@ -146,6 +151,7 @@ with gr.Blocks() as demo:
|
|
146 |
opponent_rating_dist_over_time_plot,
|
147 |
best_wins_gdf,
|
148 |
biggest_upsets_gdf,
|
|
|
149 |
most_frequent_opponents_gdf,
|
150 |
highest_rated_opponent_gdf,
|
151 |
match_longest_game_gdf,
|
|
|
10 |
df, is_tournament = mp.load_match_df(Path(file_obj.name))
|
11 |
|
12 |
# Create outputs.
|
13 |
+
# player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
|
14 |
current_rating = mp.get_current_rating(df)
|
15 |
peak_rating = mp.get_max_rating(df)
|
16 |
n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
|
|
|
18 |
matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
|
19 |
opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
|
20 |
competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
|
21 |
+
best_competitions = mp.make_df_columns_readable(mp.get_best_competitions(df, is_tournament), is_tournament)
|
22 |
most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
|
23 |
best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
|
24 |
biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
|
|
|
28 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
29 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
30 |
|
31 |
+
return (#player_name,
|
32 |
current_rating,
|
33 |
peak_rating,
|
34 |
n_competitions_played,
|
|
|
38 |
opponent_rating_dist_over_time_fig,
|
39 |
best_wins,
|
40 |
biggest_upsets,
|
41 |
+
best_competitions,
|
42 |
most_frequent_opponents,
|
43 |
highest_rated_opponent,
|
44 |
match_with_longest_game,
|
|
|
77 |
""")
|
78 |
|
79 |
with gr.Group():
|
80 |
+
# with gr.Row():
|
81 |
+
# with gr.Column():
|
82 |
+
# player_name_box = gr.Textbox(lines=1, label="Player name")
|
83 |
with gr.Row():
|
84 |
with gr.Column():
|
85 |
current_rating_box = gr.Textbox(lines=1, label="Current rating")
|
|
|
123 |
|
124 |
with gr.Row():
|
125 |
with gr.Column():
|
126 |
+
best_competitions_gdf = gr.Dataframe(
|
127 |
+
label="Best competitions (those having the largest increase in rating)",
|
128 |
+
max_rows=5)
|
129 |
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
|
130 |
highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
|
131 |
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
|
|
|
141 |
|
142 |
inputs = [input_file]
|
143 |
outputs = [
|
144 |
+
# player_name_box,
|
145 |
current_rating_box,
|
146 |
peak_rating_box,
|
147 |
num_comps_box,
|
|
|
151 |
opponent_rating_dist_over_time_plot,
|
152 |
best_wins_gdf,
|
153 |
biggest_upsets_gdf,
|
154 |
+
best_competitions_gdf,
|
155 |
most_frequent_opponents_gdf,
|
156 |
highest_rated_opponent_gdf,
|
157 |
match_longest_game_gdf,
|
match_parser.py
CHANGED
@@ -219,6 +219,43 @@ def get_biggest_upsets(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
|
219 |
return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
|
220 |
|
221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
|
223 |
return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
|
224 |
|
|
|
219 |
return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
|
220 |
|
221 |
|
222 |
+
def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5) -> pd.DataFrame:
|
223 |
+
# First add pre-competition ratings
|
224 |
+
x_key_name = "tournament_end_date" if is_tournament else "event_date"
|
225 |
+
grouped = df.groupby(x_key_name)
|
226 |
+
|
227 |
+
# We incorrectly fill the first pre-competition rating to the first rating so that
|
228 |
+
# the top-k rating differences make sense.
|
229 |
+
fill_value = df.iloc[-1].rating
|
230 |
+
pre_comp_ratings_by_group = grouped['rating'].first().shift(periods=1, fill_value=fill_value)
|
231 |
+
|
232 |
+
def assign_pre_comp_rating(group_df):
|
233 |
+
"""Assign a pre-competition rating to a given group."""
|
234 |
+
comp_end_date = group_df[x_key_name].unique()[0]
|
235 |
+
group_df['pre-competition_rating'] = pre_comp_ratings_by_group.loc[comp_end_date]
|
236 |
+
return group_df
|
237 |
+
|
238 |
+
df = grouped.apply(lambda x: assign_pre_comp_rating(x))
|
239 |
+
|
240 |
+
df['rating_increase'] = df['rating'] - df['pre-competition_rating']
|
241 |
+
best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
|
242 |
+
|
243 |
+
tournament_df = df.loc[df.tournament_end_date.isin(best_competition_dates)].groupby(
|
244 |
+
[x_key_name]).first().sort_values(by='rating_increase', ascending=False).reset_index()
|
245 |
+
|
246 |
+
cols = []
|
247 |
+
if is_tournament:
|
248 |
+
cols += ['tournament_start_date', 'tournament_end_date', 'tournament']
|
249 |
+
else:
|
250 |
+
cols += ["event_date", "league_name"]
|
251 |
+
cols += ['rating_increase', 'pre-competition_rating', 'rating']
|
252 |
+
|
253 |
+
tournament_df = tournament_df[cols]
|
254 |
+
tournament_df = tournament_df.rename(columns={"rating": "post-competition_rating"})
|
255 |
+
|
256 |
+
return tournament_df
|
257 |
+
|
258 |
+
|
259 |
def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
|
260 |
return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
|
261 |
|