Commit
·
bfe3be9
1
Parent(s):
ac4124a
feat: add best tournaments
Browse filesalso removes player name feature
- app.py +12 -6
- match_parser.py +37 -0
app.py
CHANGED
|
@@ -10,7 +10,7 @@ def usatt_rating_analyzer(file_obj):
|
|
| 10 |
df, is_tournament = mp.load_match_df(Path(file_obj.name))
|
| 11 |
|
| 12 |
# Create outputs.
|
| 13 |
-
player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
|
| 14 |
current_rating = mp.get_current_rating(df)
|
| 15 |
peak_rating = mp.get_max_rating(df)
|
| 16 |
n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
|
|
@@ -18,6 +18,7 @@ def usatt_rating_analyzer(file_obj):
|
|
| 18 |
matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
|
| 19 |
opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
|
| 20 |
competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
|
|
|
|
| 21 |
most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
|
| 22 |
best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
|
| 23 |
biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
|
|
@@ -27,7 +28,7 @@ def usatt_rating_analyzer(file_obj):
|
|
| 27 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
| 28 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
| 29 |
|
| 30 |
-
return (player_name,
|
| 31 |
current_rating,
|
| 32 |
peak_rating,
|
| 33 |
n_competitions_played,
|
|
@@ -37,6 +38,7 @@ def usatt_rating_analyzer(file_obj):
|
|
| 37 |
opponent_rating_dist_over_time_fig,
|
| 38 |
best_wins,
|
| 39 |
biggest_upsets,
|
|
|
|
| 40 |
most_frequent_opponents,
|
| 41 |
highest_rated_opponent,
|
| 42 |
match_with_longest_game,
|
|
@@ -75,9 +77,9 @@ with gr.Blocks() as demo:
|
|
| 75 |
""")
|
| 76 |
|
| 77 |
with gr.Group():
|
| 78 |
-
with gr.Row():
|
| 79 |
-
|
| 80 |
-
|
| 81 |
with gr.Row():
|
| 82 |
with gr.Column():
|
| 83 |
current_rating_box = gr.Textbox(lines=1, label="Current rating")
|
|
@@ -121,6 +123,9 @@ with gr.Blocks() as demo:
|
|
| 121 |
|
| 122 |
with gr.Row():
|
| 123 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
| 124 |
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
|
| 125 |
highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
|
| 126 |
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
|
|
@@ -136,7 +141,7 @@ with gr.Blocks() as demo:
|
|
| 136 |
|
| 137 |
inputs = [input_file]
|
| 138 |
outputs = [
|
| 139 |
-
player_name_box,
|
| 140 |
current_rating_box,
|
| 141 |
peak_rating_box,
|
| 142 |
num_comps_box,
|
|
@@ -146,6 +151,7 @@ with gr.Blocks() as demo:
|
|
| 146 |
opponent_rating_dist_over_time_plot,
|
| 147 |
best_wins_gdf,
|
| 148 |
biggest_upsets_gdf,
|
|
|
|
| 149 |
most_frequent_opponents_gdf,
|
| 150 |
highest_rated_opponent_gdf,
|
| 151 |
match_longest_game_gdf,
|
|
|
|
| 10 |
df, is_tournament = mp.load_match_df(Path(file_obj.name))
|
| 11 |
|
| 12 |
# Create outputs.
|
| 13 |
+
# player_name = mp.get_player_name(Path(file_obj.orig_name).stem)
|
| 14 |
current_rating = mp.get_current_rating(df)
|
| 15 |
peak_rating = mp.get_max_rating(df)
|
| 16 |
n_competitions_played = mp.get_num_competitions_played(df, is_tournament)
|
|
|
|
| 18 |
matches_per_competition_fig = mp.get_matches_per_competition_fig(df, is_tournament)
|
| 19 |
opponent_name_word_cloud_fig = mp.get_opponent_name_word_cloud_fig(df)
|
| 20 |
competition_name_word_cloud_fig = mp.get_competition_name_word_cloud_fig(df, is_tournament)
|
| 21 |
+
best_competitions = mp.make_df_columns_readable(mp.get_best_competitions(df, is_tournament), is_tournament)
|
| 22 |
most_frequent_opponents = mp.make_df_columns_readable(mp.get_most_frequent_opponents(df), is_tournament)
|
| 23 |
best_wins = mp.make_df_columns_readable(mp.get_best_wins(df), is_tournament)
|
| 24 |
biggest_upsets = mp.make_df_columns_readable(mp.get_biggest_upsets(df), is_tournament)
|
|
|
|
| 28 |
opponent_rating_distr_fig = mp.get_opponent_rating_distr_fig(df)
|
| 29 |
opponent_rating_dist_over_time_fig = mp.get_opponent_rating_dist_over_time_fig(df, is_tournament)
|
| 30 |
|
| 31 |
+
return (#player_name,
|
| 32 |
current_rating,
|
| 33 |
peak_rating,
|
| 34 |
n_competitions_played,
|
|
|
|
| 38 |
opponent_rating_dist_over_time_fig,
|
| 39 |
best_wins,
|
| 40 |
biggest_upsets,
|
| 41 |
+
best_competitions,
|
| 42 |
most_frequent_opponents,
|
| 43 |
highest_rated_opponent,
|
| 44 |
match_with_longest_game,
|
|
|
|
| 77 |
""")
|
| 78 |
|
| 79 |
with gr.Group():
|
| 80 |
+
# with gr.Row():
|
| 81 |
+
# with gr.Column():
|
| 82 |
+
# player_name_box = gr.Textbox(lines=1, label="Player name")
|
| 83 |
with gr.Row():
|
| 84 |
with gr.Column():
|
| 85 |
current_rating_box = gr.Textbox(lines=1, label="Current rating")
|
|
|
|
| 123 |
|
| 124 |
with gr.Row():
|
| 125 |
with gr.Column():
|
| 126 |
+
best_competitions_gdf = gr.Dataframe(
|
| 127 |
+
label="Best competitions (those having the largest increase in rating)",
|
| 128 |
+
max_rows=5)
|
| 129 |
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
|
| 130 |
highest_rated_opponent_gdf = gr.Dataframe(label="Best opponent", max_rows=1)
|
| 131 |
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
|
|
|
|
| 141 |
|
| 142 |
inputs = [input_file]
|
| 143 |
outputs = [
|
| 144 |
+
# player_name_box,
|
| 145 |
current_rating_box,
|
| 146 |
peak_rating_box,
|
| 147 |
num_comps_box,
|
|
|
|
| 151 |
opponent_rating_dist_over_time_plot,
|
| 152 |
best_wins_gdf,
|
| 153 |
biggest_upsets_gdf,
|
| 154 |
+
best_competitions_gdf,
|
| 155 |
most_frequent_opponents_gdf,
|
| 156 |
highest_rated_opponent_gdf,
|
| 157 |
match_longest_game_gdf,
|
match_parser.py
CHANGED
|
@@ -219,6 +219,43 @@ def get_biggest_upsets(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
|
| 219 |
return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
|
| 220 |
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
|
| 223 |
return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
|
| 224 |
|
|
|
|
| 219 |
return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)
|
| 220 |
|
| 221 |
|
| 222 |
+
def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5) -> pd.DataFrame:
|
| 223 |
+
# First add pre-competition ratings
|
| 224 |
+
x_key_name = "tournament_end_date" if is_tournament else "event_date"
|
| 225 |
+
grouped = df.groupby(x_key_name)
|
| 226 |
+
|
| 227 |
+
# We incorrectly fill the first pre-competition rating to the first rating so that
|
| 228 |
+
# the top-k rating differences make sense.
|
| 229 |
+
fill_value = df.iloc[-1].rating
|
| 230 |
+
pre_comp_ratings_by_group = grouped['rating'].first().shift(periods=1, fill_value=fill_value)
|
| 231 |
+
|
| 232 |
+
def assign_pre_comp_rating(group_df):
|
| 233 |
+
"""Assign a pre-competition rating to a given group."""
|
| 234 |
+
comp_end_date = group_df[x_key_name].unique()[0]
|
| 235 |
+
group_df['pre-competition_rating'] = pre_comp_ratings_by_group.loc[comp_end_date]
|
| 236 |
+
return group_df
|
| 237 |
+
|
| 238 |
+
df = grouped.apply(lambda x: assign_pre_comp_rating(x))
|
| 239 |
+
|
| 240 |
+
df['rating_increase'] = df['rating'] - df['pre-competition_rating']
|
| 241 |
+
best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
|
| 242 |
+
|
| 243 |
+
tournament_df = df.loc[df.tournament_end_date.isin(best_competition_dates)].groupby(
|
| 244 |
+
[x_key_name]).first().sort_values(by='rating_increase', ascending=False).reset_index()
|
| 245 |
+
|
| 246 |
+
cols = []
|
| 247 |
+
if is_tournament:
|
| 248 |
+
cols += ['tournament_start_date', 'tournament_end_date', 'tournament']
|
| 249 |
+
else:
|
| 250 |
+
cols += ["event_date", "league_name"]
|
| 251 |
+
cols += ['rating_increase', 'pre-competition_rating', 'rating']
|
| 252 |
+
|
| 253 |
+
tournament_df = tournament_df[cols]
|
| 254 |
+
tournament_df = tournament_df.rename(columns={"rating": "post-competition_rating"})
|
| 255 |
+
|
| 256 |
+
return tournament_df
|
| 257 |
+
|
| 258 |
+
|
| 259 |
def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
|
| 260 |
return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()
|
| 261 |
|