lschlessinger commited on
Commit
7f24c35
·
1 Parent(s): 62ef16f

add ema and fix bugs

Browse files
Files changed (2) hide show
  1. app.py +8 -8
  2. match_parser.py +23 -5
app.py CHANGED
@@ -122,12 +122,12 @@ with gr.Blocks() as demo:
122
  with gr.Row():
123
  with gr.Column():
124
  best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
125
- max_rows=5)
126
  biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
127
- max_rows=5)
128
  worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
129
  "post-competition rating from the 5 most recent "
130
- "competitions)", max_rows=5)
131
 
132
  gr.Markdown("""<br />
133
 
@@ -140,11 +140,11 @@ with gr.Blocks() as demo:
140
  with gr.Column():
141
  best_competitions_gdf = gr.Dataframe(
142
  label="Best competitions (those having the largest increase in rating)",
143
- max_rows=5)
144
- most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", max_rows=5)
145
- highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent", max_rows=1)
146
- match_longest_game_gdf = gr.Dataframe(label="Match with longest game", max_rows=1)
147
- longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)", max_rows=1)
148
 
149
  with gr.Row():
150
  with gr.Column():
 
122
  with gr.Row():
123
  with gr.Column():
124
  best_wins_gdf = gr.Dataframe(label="Best wins (matches won sorted by opponent post-competition rating)",
125
+ height=500)
126
  biggest_upsets_gdf = gr.Dataframe(label="Biggest upsets (matches won sorted by rating - opponent post-competition rating)",
127
+ height=500)
128
  worst_recent_losses_gdf = gr.Dataframe(label="Worst recent losses (matches lost sorted by opponent "
129
  "post-competition rating from the 5 most recent "
130
+ "competitions)", height=500)
131
 
132
  gr.Markdown("""<br />
133
 
 
140
  with gr.Column():
141
  best_competitions_gdf = gr.Dataframe(
142
  label="Best competitions (those having the largest increase in rating)",
143
+ height=500)
144
+ most_frequent_opponents_gdf = gr.Dataframe(label="Most frequent opponents", height=500)
145
+ highest_rated_opponent_gdf = gr.Dataframe(label="Highest rated opponent", height=100)
146
+ match_longest_game_gdf = gr.Dataframe(label="Match with longest game", height=100)
147
+ longest_match_gdf = gr.Dataframe(label="Longest match (highest number of points played)", height=100)
148
 
149
  with gr.Row():
150
  with gr.Column():
match_parser.py CHANGED
@@ -64,6 +64,8 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
64
  nat_to_none = lambda x: None if x == "NaT" else x
65
  if is_tournament:
66
  if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
 
 
67
  df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
68
  df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
69
 
@@ -87,6 +89,7 @@ def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) ->
87
  df = df.loc[:, columns]
88
  else:
89
  if "event_date" in df.columns:
 
90
  df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
91
  df = df.rename(columns={"league_name": "league"})
92
 
@@ -144,7 +147,7 @@ def get_max_rating(df: pd.DataFrame) -> int:
144
  def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
145
  fig = plt.figure()
146
  plt.title('Matches per competition')
147
- sns.histplot(df.groupby('tournament' if is_tournament else "event_date").size())
148
  plt.xlabel('Number of matches in competition')
149
  return fig
150
 
@@ -166,19 +169,33 @@ def get_opponent_name_word_cloud_fig(df: pd.DataFrame):
166
  return fig
167
 
168
 
169
- def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool):
 
 
170
  fig = go.Figure()
171
- fig.add_trace(go.Scatter(x=df["tournament_end_date" if is_tournament else "event_date"],
 
 
 
172
  y=df["rating"],
 
173
  mode='lines+markers',
174
  line=dict( width=0.9),
175
  marker=dict(size=4))),
176
 
 
 
 
 
 
 
 
 
177
  fig.update_layout(
178
  title='Rating over time',
179
  xaxis_title='Competition date',
180
  yaxis_title='Rating',
181
- showlegend=False,
182
  template="plotly_white",
183
  )
184
 
@@ -209,7 +226,7 @@ def get_win_loss_record_str(group_df) -> str:
209
  def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
210
  df_with_opponents = df.loc[df.opponent != "-, -"]
211
 
212
- most_common_opponents_df = df_with_opponents.groupby('opponent').agg({"result": [get_win_loss_record_str, "size"]})
213
  most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
214
  most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
215
  inplace=True)
@@ -260,6 +277,7 @@ def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5)
260
  df = grouped.apply(lambda x: assign_pre_comp_rating(x))
261
 
262
  df['rating_increase'] = df['rating'] - df['pre-competition_rating']
 
263
  best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
264
 
265
  tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(
 
64
  nat_to_none = lambda x: None if x == "NaT" else x
65
  if is_tournament:
66
  if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
67
+ df['tournament_start_date'] = pd.to_datetime(df['tournament_start_date'])
68
+ df['tournament_end_date'] = pd.to_datetime(df['tournament_end_date'])
69
  df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
70
  df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)
71
 
 
89
  df = df.loc[:, columns]
90
  else:
91
  if "event_date" in df.columns:
92
+ df['event_date'] = pd.to_datetime(df['event_date'])
93
  df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
94
  df = df.rename(columns={"league_name": "league"})
95
 
 
147
  def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
148
  fig = plt.figure()
149
  plt.title('Matches per competition')
150
+ sns.histplot(df.groupby('tournament' if is_tournament else "event_date", observed=False).size())
151
  plt.xlabel('Number of matches in competition')
152
  return fig
153
 
 
169
  return fig
170
 
171
 
172
+ def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int = 60):
173
+ df['ema'] = df['rating'].ewm(span=span, adjust=False).mean()
174
+
175
  fig = go.Figure()
176
+
177
+ # Raw rating over time trace
178
+ x_key_name = "tournament_end_date" if is_tournament else "event_date"
179
+ fig.add_trace(go.Scatter(x=df[x_key_name],
180
  y=df["rating"],
181
+ name='Rating',
182
  mode='lines+markers',
183
  line=dict( width=0.9),
184
  marker=dict(size=4))),
185
 
186
+ # EMA trace
187
+ fig.add_trace(go.Scatter(x=df[x_key_name],
188
+ y=df["ema"],
189
+ mode='lines',
190
+ name='Rating EMA',
191
+ visible='legendonly',
192
+ line=dict(width=1.5, dash='dot')))
193
+
194
  fig.update_layout(
195
  title='Rating over time',
196
  xaxis_title='Competition date',
197
  yaxis_title='Rating',
198
+ showlegend=True,
199
  template="plotly_white",
200
  )
201
 
 
226
  def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
227
  df_with_opponents = df.loc[df.opponent != "-, -"]
228
 
229
+ most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg({"result": [get_win_loss_record_str, "size"]})
230
  most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
231
  most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
232
  inplace=True)
 
277
  df = grouped.apply(lambda x: assign_pre_comp_rating(x))
278
 
279
  df['rating_increase'] = df['rating'] - df['pre-competition_rating']
280
+ df.reset_index(drop=True, inplace=True)
281
  best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index
282
 
283
  tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(