mariagrandury commited on
Commit
77ab908
·
1 Parent(s): eacf514

format app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -150
app.py CHANGED
@@ -1,117 +1,58 @@
1
- import gradio as gr
2
- import argilla as rg
3
- import pandas as pd
4
  import os
5
  import time
6
  from collections import defaultdict
7
- from fastapi import FastAPI
8
  from functools import lru_cache
9
 
 
 
 
 
 
10
  client = rg.Argilla(
11
- api_url=os.getenv("ARGILLA_API_URL", ""),
12
- api_key=os.getenv("ARGILLA_API_KEY", "")
13
  )
14
 
15
  countries = {
16
- "Argentina": {
17
- "iso": "ARG",
18
- "emoji": "🇦🇷"
19
- },
20
- "Bolivia": {
21
- "iso": "BOL",
22
- "emoji": "🇧🇴"
23
- },
24
- "Chile": {
25
- "iso": "CHL",
26
- "emoji": "🇨🇱"
27
- },
28
- "Colombia": {
29
- "iso": "COL",
30
- "emoji": "🇨🇴"
31
- },
32
- "Costa Rica": {
33
- "iso": "CRI",
34
- "emoji": "🇨🇷"
35
- },
36
- "Cuba": {
37
- "iso": "CUB",
38
- "emoji": "🇨🇺"
39
- },
40
- "Ecuador": {
41
- "iso": "ECU",
42
- "emoji": "🇪🇨"
43
- },
44
- "El Salvador": {
45
- "iso": "SLV",
46
- "emoji": "🇸🇻"
47
- },
48
- "España": {
49
- "iso": "ESP",
50
- "emoji": "🇪🇸"
51
- },
52
- "Guatemala": {
53
- "iso": "GTM",
54
- "emoji": "🇬🇹"
55
- },
56
- "Honduras": {
57
- "iso": "HND",
58
- "emoji": "🇭🇳"
59
- },
60
- "México": {
61
- "iso": "MEX",
62
- "emoji": "🇲🇽"
63
- },
64
- "Nicaragua": {
65
- "iso": "NIC",
66
- "emoji": "🇳🇮"
67
- },
68
- "Panamá": {
69
- "iso": "PAN",
70
- "emoji": "🇵🇦"
71
- },
72
- "Paraguay": {
73
- "iso": "PRY",
74
- "emoji": "🇵🇾"
75
- },
76
- "Perú": {
77
- "iso": "PER",
78
- "emoji": "🇵🇪"
79
- },
80
- "Puerto Rico": {
81
- "iso": "PRI",
82
- "emoji": "🇵🇷"
83
- },
84
- "República Dominicana": {
85
- "iso": "DOM",
86
- "emoji": "🇩🇴"
87
- },
88
- "Uruguay": {
89
- "iso": "URY",
90
- "emoji": "🇺🇾"
91
- },
92
- "Venezuela": {
93
- "iso": "VEN",
94
- "emoji": "🇻🇪"
95
- }
96
  }
97
 
 
98
  def get_blend_es_data():
99
  data = []
100
-
101
  for country in countries.keys():
102
  iso = countries[country]["iso"]
103
  emoji = countries[country]["emoji"]
104
-
105
  dataset_name = f"{emoji} {country} - {iso} - Responder"
106
-
107
  try:
108
  print(f"Processing dataset: {dataset_name}")
109
  dataset = client.datasets(dataset_name)
110
  records = list(dataset.records(with_responses=True))
111
-
112
  dataset_contributions = defaultdict(int)
113
  user_mapping = {}
114
-
115
  for record in records:
116
  record_dict = record.to_dict()
117
  if "answer_1" in record_dict["responses"]:
@@ -119,7 +60,7 @@ def get_blend_es_data():
119
  if answer["user_id"]:
120
  user_id = answer["user_id"]
121
  dataset_contributions[user_id] += 1
122
-
123
  if user_id not in user_mapping:
124
  try:
125
  user = client.users(id=user_id)
@@ -127,44 +68,45 @@ def get_blend_es_data():
127
  except Exception as e:
128
  print(f"Error getting username for {user_id}: {e}")
129
  user_mapping[user_id] = f"User-{user_id[:8]}"
130
-
131
  for user_id, count in dataset_contributions.items():
132
  username = user_mapping.get(user_id, f"User-{user_id[:8]}")
133
- data.append({
134
- "source": "blend-es",
135
- "username": username,
136
- "count": count
137
- })
138
-
139
  except Exception as e:
140
  print(f"Error processing dataset {dataset_name}: {e}")
141
-
142
  return data
143
 
 
144
  def get_include_data():
145
  data = []
146
  try:
147
  if os.path.exists("include.csv"):
148
  include_df = pd.read_csv("include.csv")
149
- if "Nombre en Discord / username" in include_df.columns and "Número de preguntas / number of questions" in include_df.columns:
 
 
 
150
  discord_users = defaultdict(int)
151
  for _, row in include_df.iterrows():
152
  username = row["Nombre en Discord / username"][1:]
153
  questions = row["Número de preguntas / number of questions"]
154
  if pd.notna(username) and pd.notna(questions):
155
  discord_users[username.lower()] += int(questions)
156
-
157
  for username, count in discord_users.items():
158
- data.append({
159
- "source": "include",
160
- "username": username,
161
- "count": count
162
- })
163
  except Exception as e:
164
  print(f"Error loading include.csv: {e}")
165
-
166
  return data
167
 
 
168
  def get_mail_to_username_mapping():
169
  mail_to_discord = {}
170
  try:
@@ -178,13 +120,14 @@ def get_mail_to_username_mapping():
178
  mail_to_discord[mail.lower()] = discord.lower()
179
  except Exception as e:
180
  print(f"Error loading mail_to_username.csv: {e}")
181
-
182
  return mail_to_discord
183
 
 
184
  def get_estereotipos_data():
185
  data = []
186
  mail_to_discord = get_mail_to_username_mapping()
187
-
188
  try:
189
  if os.path.exists("token_id_counts.csv"):
190
  counts_df = pd.read_csv("token_id_counts.csv")
@@ -195,79 +138,85 @@ def get_estereotipos_data():
195
  count = row["count"]
196
  if pd.notna(mail) and pd.notna(count):
197
  mail_counts[mail.lower()] += int(count)
198
-
199
  for mail, count in mail_counts.items():
200
  username = mail_to_discord.get(mail.lower(), "")
201
  if not username:
202
- username = mail.split('@')[0] if '@' in mail else mail
203
-
204
- data.append({
205
- "source": "estereotipos",
206
- "username": username,
207
- "count": count
208
- })
209
  except Exception as e:
210
  print(f"Error loading estereotipos data: {e}")
211
-
212
  return data
213
 
 
214
  def get_arena_data():
215
  data = []
216
  mail_to_discord = get_mail_to_username_mapping()
217
-
218
  try:
219
  if os.path.exists("arena.json"):
220
  import json
 
221
  with open("arena.json", "r", encoding="utf-8") as f:
222
  arena_data = json.load(f)
223
-
224
  mail_counts = defaultdict(int)
225
-
226
  for country, conversations in arena_data.items():
227
  for conversation in conversations:
228
  if "username" in conversation:
229
  mail = conversation["username"]
230
  if mail:
231
  mail_counts[mail.lower()] += 1
232
-
233
  for mail, count in mail_counts.items():
234
  username = mail_to_discord.get(mail.lower(), "")
235
  if not username:
236
- username = mail.split('@')[0] if '@' in mail else mail
237
-
238
- data.append({
239
- "source": "arena",
240
- "username": username,
241
- "count": count
242
- })
243
  except Exception as e:
244
  print(f"Error loading arena data: {e}")
245
-
246
  return data
247
 
 
248
  @lru_cache(maxsize=32)
249
  def get_user_contributions_cached(cache_buster: int):
250
  return consolidate_all_data()
251
 
 
252
  def consolidate_all_data():
253
  all_data = []
254
  all_data.extend(get_blend_es_data())
255
  all_data.extend(get_include_data())
256
  all_data.extend(get_estereotipos_data())
257
  all_data.extend(get_arena_data())
258
-
259
- user_contributions = defaultdict(lambda: {"username": "", "blend_es": 0, "include": 0, "estereotipos": 0, "arena": 0})
260
-
 
 
 
 
 
 
 
 
261
  for item in all_data:
262
  source = item["source"]
263
  username = item["username"]
264
  count = item["count"]
265
-
266
  user_key = username.lower()
267
-
268
  if not user_contributions[user_key]["username"]:
269
  user_contributions[user_key]["username"] = username
270
-
271
  if source == "blend-es":
272
  user_contributions[user_key]["blend_es"] += count
273
  elif source == "include":
@@ -276,36 +225,40 @@ def consolidate_all_data():
276
  user_contributions[user_key]["estereotipos"] += count
277
  elif source == "arena":
278
  user_contributions[user_key]["arena"] += count
279
-
280
  rows = []
281
  for _, data in user_contributions.items():
282
- total = data["blend_es"] + data["include"] + data["estereotipos"] + data["arena"]
 
 
283
  row = {
284
  "Username": data["username"],
285
  "Total": total,
286
  "Blend-es": data["blend_es"],
287
  "INCLUDE": data["include"],
288
  "Estereotipos": data["estereotipos"],
289
- "Arena": data["arena"]
290
  }
291
  rows.append(row)
292
-
293
  df = pd.DataFrame(rows)
294
-
295
  if not df.empty:
296
  df = df.sort_values("Total", ascending=False)
297
-
298
  return df
299
 
 
300
  app = FastAPI()
301
 
302
  last_update_time = 0
303
  cached_data = None
304
 
 
305
  def create_leaderboard_ui():
306
  global cached_data, last_update_time
307
  current_time = time.time()
308
-
309
  if cached_data is not None and current_time - last_update_time < 300:
310
  df = cached_data
311
  else:
@@ -313,15 +266,15 @@ def create_leaderboard_ui():
313
  df = get_user_contributions_cached(cache_buster)
314
  cached_data = df
315
  last_update_time = current_time
316
-
317
  if not df.empty:
318
  df = df.reset_index(drop=True)
319
  df.index = df.index + 1
320
  df = df.rename_axis("Rank")
321
  df = df.reset_index()
322
-
323
  df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
324
-
325
  styled_html = f"""
326
  <div style="margin: 20px 0;">
327
  <p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
@@ -390,18 +343,20 @@ def create_leaderboard_ui():
390
  """
391
  return styled_html
392
 
 
393
  def refresh_data():
394
  global cached_data, last_update_time
395
  cached_data = None
396
  last_update_time = 0
397
  return create_leaderboard_ui()
398
 
 
399
  with gr.Blocks(theme=gr.themes.Default()) as demo:
400
  with gr.Column(scale=1):
401
  gr.Markdown("""# 🏆 Hackaton Leaderboard""")
402
-
403
  leaderboard_html = gr.HTML(create_leaderboard_ui)
404
-
405
  refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
406
  refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
407
 
@@ -409,4 +364,5 @@ gr.mount_gradio_app(app, demo, path="/")
409
 
410
  if __name__ == "__main__":
411
  import uvicorn
412
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
1
  import os
2
  import time
3
  from collections import defaultdict
 
4
  from functools import lru_cache
5
 
6
+ import argilla as rg
7
+ import gradio as gr
8
+ import pandas as pd
9
+ from fastapi import FastAPI
10
+
11
  client = rg.Argilla(
12
+ api_url=os.getenv("ARGILLA_API_URL", ""), api_key=os.getenv("ARGILLA_API_KEY", "")
 
13
  )
14
 
15
  countries = {
16
+ "Argentina": {"iso": "ARG", "emoji": "🇦🇷"},
17
+ "Bolivia": {"iso": "BOL", "emoji": "🇧🇴"},
18
+ "Chile": {"iso": "CHL", "emoji": "🇨🇱"},
19
+ "Colombia": {"iso": "COL", "emoji": "🇨🇴"},
20
+ "Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"},
21
+ "Cuba": {"iso": "CUB", "emoji": "🇨🇺"},
22
+ "Ecuador": {"iso": "ECU", "emoji": "🇪🇨"},
23
+ "El Salvador": {"iso": "SLV", "emoji": "🇸🇻"},
24
+ "España": {"iso": "ESP", "emoji": "🇪🇸"},
25
+ "Guatemala": {"iso": "GTM", "emoji": "🇬🇹"},
26
+ "Honduras": {"iso": "HND", "emoji": "🇭🇳"},
27
+ "México": {"iso": "MEX", "emoji": "🇲🇽"},
28
+ "Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"},
29
+ "Panamá": {"iso": "PAN", "emoji": "🇵🇦"},
30
+ "Paraguay": {"iso": "PRY", "emoji": "🇵🇾"},
31
+ "Perú": {"iso": "PER", "emoji": "🇵🇪"},
32
+ "Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"},
33
+ "República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"},
34
+ "Uruguay": {"iso": "URY", "emoji": "🇺🇾"},
35
+ "Venezuela": {"iso": "VEN", "emoji": "🇻🇪"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
 
38
+
39
  def get_blend_es_data():
40
  data = []
41
+
42
  for country in countries.keys():
43
  iso = countries[country]["iso"]
44
  emoji = countries[country]["emoji"]
45
+
46
  dataset_name = f"{emoji} {country} - {iso} - Responder"
47
+
48
  try:
49
  print(f"Processing dataset: {dataset_name}")
50
  dataset = client.datasets(dataset_name)
51
  records = list(dataset.records(with_responses=True))
52
+
53
  dataset_contributions = defaultdict(int)
54
  user_mapping = {}
55
+
56
  for record in records:
57
  record_dict = record.to_dict()
58
  if "answer_1" in record_dict["responses"]:
 
60
  if answer["user_id"]:
61
  user_id = answer["user_id"]
62
  dataset_contributions[user_id] += 1
63
+
64
  if user_id not in user_mapping:
65
  try:
66
  user = client.users(id=user_id)
 
68
  except Exception as e:
69
  print(f"Error getting username for {user_id}: {e}")
70
  user_mapping[user_id] = f"User-{user_id[:8]}"
71
+
72
  for user_id, count in dataset_contributions.items():
73
  username = user_mapping.get(user_id, f"User-{user_id[:8]}")
74
+ data.append(
75
+ {"source": "blend-es", "username": username, "count": count}
76
+ )
77
+
 
 
78
  except Exception as e:
79
  print(f"Error processing dataset {dataset_name}: {e}")
80
+
81
  return data
82
 
83
+
84
  def get_include_data():
85
  data = []
86
  try:
87
  if os.path.exists("include.csv"):
88
  include_df = pd.read_csv("include.csv")
89
+ if (
90
+ "Nombre en Discord / username" in include_df.columns
91
+ and "Número de preguntas / number of questions" in include_df.columns
92
+ ):
93
  discord_users = defaultdict(int)
94
  for _, row in include_df.iterrows():
95
  username = row["Nombre en Discord / username"][1:]
96
  questions = row["Número de preguntas / number of questions"]
97
  if pd.notna(username) and pd.notna(questions):
98
  discord_users[username.lower()] += int(questions)
99
+
100
  for username, count in discord_users.items():
101
+ data.append(
102
+ {"source": "include", "username": username, "count": count}
103
+ )
 
 
104
  except Exception as e:
105
  print(f"Error loading include.csv: {e}")
106
+
107
  return data
108
 
109
+
110
  def get_mail_to_username_mapping():
111
  mail_to_discord = {}
112
  try:
 
120
  mail_to_discord[mail.lower()] = discord.lower()
121
  except Exception as e:
122
  print(f"Error loading mail_to_username.csv: {e}")
123
+
124
  return mail_to_discord
125
 
126
+
127
  def get_estereotipos_data():
128
  data = []
129
  mail_to_discord = get_mail_to_username_mapping()
130
+
131
  try:
132
  if os.path.exists("token_id_counts.csv"):
133
  counts_df = pd.read_csv("token_id_counts.csv")
 
138
  count = row["count"]
139
  if pd.notna(mail) and pd.notna(count):
140
  mail_counts[mail.lower()] += int(count)
141
+
142
  for mail, count in mail_counts.items():
143
  username = mail_to_discord.get(mail.lower(), "")
144
  if not username:
145
+ username = mail.split("@")[0] if "@" in mail else mail
146
+
147
+ data.append(
148
+ {"source": "estereotipos", "username": username, "count": count}
149
+ )
 
 
150
  except Exception as e:
151
  print(f"Error loading estereotipos data: {e}")
152
+
153
  return data
154
 
155
+
156
  def get_arena_data():
157
  data = []
158
  mail_to_discord = get_mail_to_username_mapping()
159
+
160
  try:
161
  if os.path.exists("arena.json"):
162
  import json
163
+
164
  with open("arena.json", "r", encoding="utf-8") as f:
165
  arena_data = json.load(f)
166
+
167
  mail_counts = defaultdict(int)
168
+
169
  for country, conversations in arena_data.items():
170
  for conversation in conversations:
171
  if "username" in conversation:
172
  mail = conversation["username"]
173
  if mail:
174
  mail_counts[mail.lower()] += 1
175
+
176
  for mail, count in mail_counts.items():
177
  username = mail_to_discord.get(mail.lower(), "")
178
  if not username:
179
+ username = mail.split("@")[0] if "@" in mail else mail
180
+
181
+ data.append({"source": "arena", "username": username, "count": count})
 
 
 
 
182
  except Exception as e:
183
  print(f"Error loading arena data: {e}")
184
+
185
  return data
186
 
187
+
188
  @lru_cache(maxsize=32)
189
  def get_user_contributions_cached(cache_buster: int):
190
  return consolidate_all_data()
191
 
192
+
193
  def consolidate_all_data():
194
  all_data = []
195
  all_data.extend(get_blend_es_data())
196
  all_data.extend(get_include_data())
197
  all_data.extend(get_estereotipos_data())
198
  all_data.extend(get_arena_data())
199
+
200
+ user_contributions = defaultdict(
201
+ lambda: {
202
+ "username": "",
203
+ "blend_es": 0,
204
+ "include": 0,
205
+ "estereotipos": 0,
206
+ "arena": 0,
207
+ }
208
+ )
209
+
210
  for item in all_data:
211
  source = item["source"]
212
  username = item["username"]
213
  count = item["count"]
214
+
215
  user_key = username.lower()
216
+
217
  if not user_contributions[user_key]["username"]:
218
  user_contributions[user_key]["username"] = username
219
+
220
  if source == "blend-es":
221
  user_contributions[user_key]["blend_es"] += count
222
  elif source == "include":
 
225
  user_contributions[user_key]["estereotipos"] += count
226
  elif source == "arena":
227
  user_contributions[user_key]["arena"] += count
228
+
229
  rows = []
230
  for _, data in user_contributions.items():
231
+ total = (
232
+ data["blend_es"] + data["include"] + data["estereotipos"] + data["arena"]
233
+ )
234
  row = {
235
  "Username": data["username"],
236
  "Total": total,
237
  "Blend-es": data["blend_es"],
238
  "INCLUDE": data["include"],
239
  "Estereotipos": data["estereotipos"],
240
+ "Arena": data["arena"],
241
  }
242
  rows.append(row)
243
+
244
  df = pd.DataFrame(rows)
245
+
246
  if not df.empty:
247
  df = df.sort_values("Total", ascending=False)
248
+
249
  return df
250
 
251
+
252
  app = FastAPI()
253
 
254
  last_update_time = 0
255
  cached_data = None
256
 
257
+
258
  def create_leaderboard_ui():
259
  global cached_data, last_update_time
260
  current_time = time.time()
261
+
262
  if cached_data is not None and current_time - last_update_time < 300:
263
  df = cached_data
264
  else:
 
266
  df = get_user_contributions_cached(cache_buster)
267
  cached_data = df
268
  last_update_time = current_time
269
+
270
  if not df.empty:
271
  df = df.reset_index(drop=True)
272
  df.index = df.index + 1
273
  df = df.rename_axis("Rank")
274
  df = df.reset_index()
275
+
276
  df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
277
+
278
  styled_html = f"""
279
  <div style="margin: 20px 0;">
280
  <p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
 
343
  """
344
  return styled_html
345
 
346
+
347
  def refresh_data():
348
  global cached_data, last_update_time
349
  cached_data = None
350
  last_update_time = 0
351
  return create_leaderboard_ui()
352
 
353
+
354
  with gr.Blocks(theme=gr.themes.Default()) as demo:
355
  with gr.Column(scale=1):
356
  gr.Markdown("""# 🏆 Hackaton Leaderboard""")
357
+
358
  leaderboard_html = gr.HTML(create_leaderboard_ui)
359
+
360
  refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
361
  refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
362
 
 
364
 
365
  if __name__ == "__main__":
366
  import uvicorn
367
+
368
+ uvicorn.run(app, host="0.0.0.0", port=7860)