Commit
·
7f24c35
1
Parent(s):
62ef16f
add ema and fix bugs
Browse files- app.py +8 -8
- match_parser.py +23 -5
app.py
CHANGED
@@ -122,12 +122,12 @@ with gr.Blocks() as demo:
|
|
122 |
with gr.Row():
|
123 |
with gr.Column():
|
124 |
best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
|
125 |
-
|
126 |
biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
|
127 |
-
|
128 |
worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
|
129 |
"post-competition rating from the 5 most recent "
|
130 |
-
"competitions)",
|
131 |
|
132 |
gr.Markdown("""<br />
|
133 |
|
@@ -140,11 +140,11 @@ with gr.Blocks() as demo:
|
|
140 |
with gr.Column():
|
141 |
best_competitions_gdf = gr.Dataframe(
|
142 |
label="Best competitions (those having the largest increase in rating)",
|
143 |
-
|
144 |
-
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents",
|
145 |
-
highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent",
|
146 |
-
match_longest_game_gdf = gr.Dataframe(label="Match with longest game",
|
147 |
-
longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)",
|
148 |
|
149 |
with gr.Row():
|
150 |
with gr.Column():
|
|
|
122 |
with gr.Row():
|
123 |
with gr.Column():
|
124 |
best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
|
125 |
+
height=500)
|
126 |
biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
|
127 |
+
height=500)
|
128 |
worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
|
129 |
"post-competition rating from the 5 most recent "
|
130 |
+
"competitions)", height=500)
|
131 |
|
132 |
gr.Markdown("""<br />
|
133 |
|
|
|
140 |
with gr.Column():
|
141 |
best_competitions_gdf = gr.Dataframe(
|
142 |
label="Best competitions (those having the largest increase in rating)",
|
143 |
+
height=500)
|
144 |
+
most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", height=500)
|
145 |
+
highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent", height=100)
|
146 |
+
match_longest_game_gdf = gr.Dataframe(label="Match with longest game", height=100)
|
147 |
+
longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)", height=100)
|
148 |
|
149 |
with gr.Row():
|
150 |
with gr.Column():
|
match_parser.py
CHANGED
@@ -64,6 +64,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
|
|
64 |
nat_to_none = lambda x: None if x == "NaT" else x
|
65 |
if is_tournament:
|
66 |
if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
|
|
|
|
|
67 |
df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
|
68 |
df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
|
69 |
|
@@ -87,6 +89,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
|
|
87 |
df = df.loc[:, columns]
|
88 |
else:
|
89 |
if "event_date" in df.columns:
|
|
|
90 |
df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
|
91 |
df = df.rename(columns={"league_name": "league"})
|
92 |
|
@@ -144,7 +147,7 @@ def get_max_rating(df: pd.DataFrame) -> int:
|
|
144 |
def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
|
145 |
fig = plt.figure()
|
146 |
plt.title('Matches per competition')
|
147 |
-
sns.histplot(df.groupby('tournament' if is_tournament else "event_date").size())
|
148 |
plt.xlabel('Number of matches in competition')
|
149 |
return fig
|
150 |
|
@@ -166,19 +169,33 @@ def get_opponent_name_word_cloud_fig(df: pd.DataFrame):
|
|
166 |
return fig
|
167 |
|
168 |
|
169 |
-
def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool):
|
|
|
|
|
170 |
fig = go.Figure()
|
171 |
-
|
|
|
|
|
|
|
172 |
y=df["rating"],
|
|
|
173 |
mode='lines+markers',
|
174 |
line=dict( width=0.9),
|
175 |
marker=dict(size=4))),
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
fig.update_layout(
|
178 |
title='Rating over time',
|
179 |
xaxis_title='Competition date',
|
180 |
yaxis_title='Rating',
|
181 |
-
showlegend=
|
182 |
template="plotly_white",
|
183 |
)
|
184 |
|
@@ -209,7 +226,7 @@ def get_win_loss_record_str(group_df) -> str:
|
|
209 |
def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
210 |
df_with_opponents = df.loc[df.opponent != "-, -"]
|
211 |
|
212 |
-
most_common_opponents_df = df_with_opponents.groupby('opponent').agg({"result": [get_win_loss_record_str, "size"]})
|
213 |
most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
|
214 |
most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
|
215 |
inplace=True)
|
@@ -260,6 +277,7 @@ def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5)
|
|
260 |
df = grouped.apply(lambda x: assign_pre_comp_rating(x))
|
261 |
|
262 |
df['rating_increase'] = df['rating'] - df['pre-competition_rating']
|
|
|
263 |
best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
|
264 |
|
265 |
tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(
|
|
|
64 |
nat_to_none = lambda x: None if x == "NaT" else x
|
65 |
if is_tournament:
|
66 |
if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
|
67 |
+
df['tournament_start_date'] = pd.to_datetime(df['tournament_start_date'])
|
68 |
+
df['tournament_end_date'] = pd.to_datetime(df['tournament_end_date'])
|
69 |
df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
|
70 |
df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
|
71 |
|
|
|
89 |
df = df.loc[:, columns]
|
90 |
else:
|
91 |
if "event_date" in df.columns:
|
92 |
+
df['event_date'] = pd.to_datetime(df['event_date'])
|
93 |
df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
|
94 |
df = df.rename(columns={"league_name": "league"})
|
95 |
|
|
|
147 |
def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
|
148 |
fig = plt.figure()
|
149 |
plt.title('Matches per competition')
|
150 |
+
sns.histplot(df.groupby('tournament' if is_tournament else "event_date", observed=False).size())
|
151 |
plt.xlabel('Number of matches in competition')
|
152 |
return fig
|
153 |
|
|
|
169 |
return fig
|
170 |
|
171 |
|
172 |
+
def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int = 60):
|
173 |
+
df['ema'] = df['rating'].ewm(span=span, adjust=False).mean()
|
174 |
+
|
175 |
fig = go.Figure()
|
176 |
+
|
177 |
+
# Raw rating over time trace
|
178 |
+
x_key_name = "tournament_end_date" if is_tournament else "event_date"
|
179 |
+
fig.add_trace(go.Scatter(x=df[x_key_name],
|
180 |
y=df["rating"],
|
181 |
+
name='Rating',
|
182 |
mode='lines+markers',
|
183 |
line=dict( width=0.9),
|
184 |
marker=dict(size=4))),
|
185 |
|
186 |
+
# EMA trace
|
187 |
+
fig.add_trace(go.Scatter(x=df[x_key_name],
|
188 |
+
y=df["ema"],
|
189 |
+
mode='lines',
|
190 |
+
name='Rating EMA',
|
191 |
+
visible='legendonly',
|
192 |
+
line=dict(width=1.5, dash='dot')))
|
193 |
+
|
194 |
fig.update_layout(
|
195 |
title='Rating over time',
|
196 |
xaxis_title='Competition date',
|
197 |
yaxis_title='Rating',
|
198 |
+
showlegend=True,
|
199 |
template="plotly_white",
|
200 |
)
|
201 |
|
|
|
226 |
def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
|
227 |
df_with_opponents = df.loc[df.opponent != "-, -"]
|
228 |
|
229 |
+
most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg({"result": [get_win_loss_record_str, "size"]})
|
230 |
most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
|
231 |
most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
|
232 |
inplace=True)
|
|
|
277 |
df = grouped.apply(lambda x: assign_pre_comp_rating(x))
|
278 |
|
279 |
df['rating_increase'] = df['rating'] - df['pre-competition_rating']
|
280 |
+
df.reset_index(drop=True, inplace=True)
|
281 |
best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
|
282 |
|
283 |
tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(
|