elmadany commited on
Commit
345905a
·
verified ·
1 Parent(s): 3d1f5bb

Delete src/helper.py

Browse files
Files changed (1) hide show
  1. src/helper.py +0 -312
src/helper.py DELETED
@@ -1,312 +0,0 @@
1
- import pandas as pd
2
- from statistics import mean
3
- import pandas as pd
4
- import json
5
- import numpy as np
6
- from statistics import mean
7
- import re
8
- from datasets import load_dataset
9
- import os
10
- from collections import defaultdict
11
- from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
12
- TASKS_LIST={
13
- 'xlni':'Cross-Lingual Natural Language Inference',
14
- 'lid':'Language Identification',
15
- 'news': 'News Classification',
16
- 'sentiment':'Sentiment Analysis',
17
- 'topic':'Topic Classification',
18
- 'mt_eng2xx':'Machine Translation - English to African',
19
- 'mt_fra2xx':'Machine Translation - French to African',
20
- 'mt_xx2xx':'Machine Translation - African to African',
21
- 'paraphrase':'Paraphrase',
22
- 'summary':'Summarization',
23
- 'title':'Title Generation',
24
- 'mmlu':'General Knowledge',
25
- 'mgsm':'Mathematical Word Problems',
26
- 'belebele':'Reading Comprehension',
27
- 'squad_qa':'Context-based Question Answering',
28
- 'ner':'Named Entity Recognition',
29
- 'phrase':'Phrase Chunking',
30
- 'pos':'Part-of-Speech Tagging',
31
- }
32
- CLUSTERS = {
33
- "Text Classification": [
34
- 'xlni', 'lid', 'news', 'sentiment', 'topic',
35
- ],
36
- "Text Generation": [
37
- 'mt_eng2xx', 'mt_fra2xx', 'mt_xx2xx', 'paraphrase', 'summary', 'title',
38
- ],
39
- "MCCR": [
40
- 'mmlu', 'mgsm', 'belebele', 'squad_qa',
41
- ],
42
- "Tokens": [
43
- 'ner', 'phrase', 'pos',
44
- ],
45
- }
46
- ALL_TASKS = [t for cluster in CLUSTERS.values() for t in cluster]
47
-
48
- # ===== Authenticate and Load Data From Private HF Repo =====
49
-
50
- def load_private_leaderboard_df():
51
- ds = load_dataset(
52
- path=SAHARA_DATA,
53
- name=None,
54
- data_files=SAHARA_RESULTS,
55
- split="train",
56
- download_mode="force_redownload"
57
- )
58
- return ds.to_pandas()
59
- metrics_list={
60
- 'bleu_1k':'spBleu<sup>1K</sup>',
61
- 'accuracy':'Accuracy',
62
- 'f1':'Macro-F1',
63
- 'exact_match':'Exact Match',
64
- 'rougeL':'RougeL',
65
- }
66
- LANG_ISO2NAME = {
67
- 'eng': 'English',
68
- 'fra': 'French',
69
- # 'ara': 'Arabic',
70
- 'amh': 'Amharic',
71
- 'ewe': 'Ewe',
72
- 'hau': 'Hausa',
73
- 'ibo': 'Igbo',
74
- 'kin': 'Kinyarwanda',
75
- 'lin': 'Lingala',
76
- 'lug': 'Ganda',
77
- 'orm': 'Oromo',
78
- 'sna': 'Shona',
79
- 'sot': 'Southern Sotho',
80
- 'swa': 'Swahili', 'swh': 'Swahili',
81
- 'twi': 'Twi',
82
- 'wol': 'Wolof',
83
- 'xho': 'Xhosa',
84
- 'yor': 'Yoruba',
85
- 'zul': 'Zulu',
86
- 'afr': 'Afrikaans',
87
- 'run': 'Rundi',
88
- 'tir': 'Tigrinya',
89
- 'som': 'Somali',
90
- 'pcm': 'Nigerian Pidgin',
91
- 'teo': 'Teso',
92
- 'nyn': 'Nyankore/Nyankole',
93
- 'lgg': 'Lugbara',
94
- 'bem': 'Bemba/Chibemba',
95
- 'tsn': 'Tswana',
96
- 'bbj': 'Ghomálá',
97
- 'mos': 'Moore',
98
- 'bam': 'Bambara',
99
- 'fon': 'Fon',
100
- 'ach': 'Acholi',
101
- 'nso': 'Sepedi',
102
- 'tso': 'Tsonga',
103
- 'fuv': 'Fulfude Nigeria',
104
- 'gaz': 'Oromo, West Central',
105
- 'kea': 'Kabuverdianu',
106
- 'nya': 'Nyanja',
107
- 'ssw': 'Swati',
108
- 'luo': 'Dholuo/Luo',
109
- 'ven': 'Venda',
110
- 'kir':"Kirundi",
111
- }
112
-
113
- # ===== Build Language Name→ISOs map =====
114
- def build_langname_to_isos(iso2name):
115
- name2isos = defaultdict(set)
116
- for iso, name in iso2name.items():
117
- name2isos[name].add(iso)
118
- return name2isos
119
-
120
- LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
121
- #show only African langs
122
- LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
123
-
124
- def get_task_metric_map(df):
125
- mapping = {}
126
- for _, row in df.iterrows():
127
- mapping[row["task"]] = row["metric"]
128
- return mapping
129
-
130
- def cluster_average(row, tasks):
131
- vals = []
132
- for t in tasks:
133
- try:
134
- v = float(row[t])
135
- vals.append(v)
136
- except Exception:
137
- continue
138
- return np.mean(vals) if vals else np.nan
139
-
140
- def add_medals_to_models(df, score_col="overall score"):
141
- score_float_col = "__score_float"
142
- df[score_float_col] = df[score_col].apply(lambda x: float(x) if x != "---" else np.nan)
143
- df = df.sort_values(by=score_float_col, ascending=False, kind="mergesort").reset_index(drop=True)
144
- def get_rank_symbols(scores):
145
- unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
146
- symbols = ["🏆", "🥈", "🥉"]
147
- score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
148
- return [score_to_symbol.get(s, "") for s in scores]
149
- df['rank_symbol'] = get_rank_symbols(df[score_float_col].tolist())
150
- df['model'] = df['rank_symbol'] + ' ' + df['model']
151
- df = df.drop(columns=['rank_symbol', score_float_col])
152
- return df
153
-
154
- def format_cluster_table(df, cluster_tasks, metric_map):
155
- col_order = ["model"] + cluster_tasks
156
- for t in cluster_tasks:
157
- if t not in df.columns:
158
- df[t] = '---'
159
- df = df[col_order]
160
- for t in cluster_tasks:
161
- df[t] = df[t].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
162
- df["Cluster Score"] = df[cluster_tasks].apply(
163
- lambda row: cluster_average(row, cluster_tasks), axis=1
164
- )
165
- df["Cluster Score"] = df["Cluster Score"].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
166
- df = df[["model", "Cluster Score"] + cluster_tasks]
167
- # rename = {t: f"{t}\n{metric_map.get(t, '')}" for t in cluster_tasks}
168
- rename = {t: f"{TASKS_LIST[t]}<br>Metric: {metrics_list[metric_map.get(t, '')]}" for t in cluster_tasks}
169
- df = df.rename(columns=rename)
170
- df = add_medals_to_models(df, score_col="Cluster Score")
171
- return df
172
-
173
- def format_main_overall_table(df, metric_map):
174
- main = df.copy()
175
- for cname, tasks in CLUSTERS.items():
176
- main[cname] = main[tasks].apply(lambda row: cluster_average(row, tasks), axis=1)
177
- cluster_cols = list(CLUSTERS.keys())
178
- main["Overall Score"] = main[cluster_cols].apply(
179
- lambda row: np.nanmean([x for x in row if pd.notna(x)]), axis=1
180
- )
181
- for c in cluster_cols + ["Overall Score"]:
182
- main[c] = main[c].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
183
- main = main[["model", "Overall Score"] + cluster_cols]
184
- main = add_medals_to_models(main, score_col="Overall Score")
185
- main.rename(columns={'Overall Score': 'Sahara Score'}, inplace=True)
186
- return main
187
-
188
- def load_leaderboards():
189
- df = load_private_leaderboard_df()
190
- metric_map = get_task_metric_map(df)
191
- main_df = df[df['leaderboard'] == 'main'].copy()
192
- if main_df.empty:
193
- cluster_tabs = {c: pd.DataFrame([{"Info": "No data"}]) for c in CLUSTERS}
194
- main_overall_tab = pd.DataFrame([{"Info": "No data"}])
195
- return cluster_tabs, main_overall_tab, [], {}, df, metric_map
196
- main_tasks_df = main_df.pivot_table(index='model', columns='task', values='score').reset_index()
197
- cluster_tabs = {}
198
- for cname, tasks in CLUSTERS.items():
199
- cluster_tabs[cname] = format_cluster_table(main_tasks_df, tasks, metric_map)
200
- for t in ALL_TASKS:
201
- if t not in main_tasks_df.columns:
202
- main_tasks_df[t] = np.nan
203
- main_overall_tab = format_main_overall_table(main_tasks_df, metric_map)
204
- all_langs = sorted([lb for lb in df['leaderboard'].unique() if lb not in ['main']])
205
- return cluster_tabs, main_overall_tab, df, metric_map
206
-
207
- def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
208
- # Remove any column whose name contains "task"
209
- drop_cols = [col for col in df.columns if "task" in col]
210
- df = df.drop(columns=drop_cols, errors="ignore")
211
- df.columns.name = None
212
- html=""
213
- # html = f"""
214
- # <style>
215
- # .gradio-container-5-34-1 .prose table {{
216
- # border-top: 2px solid #dca02a;
217
- # border-bottom: 2px solid #dca02a;
218
- # margin-bottom:20px;
219
- # margin-left: auto;
220
- # margin-right: auto;
221
- # width: 100%;
222
- # border-collapse: collapse;
223
- # table-layout: fixed;
224
- # }}
225
- # .gradio-container-5-34-1 .prose thead tr {{
226
- # background: #fffbe9;
227
- # border-bottom: 2px solid #dca02a;
228
- # }}
229
- # .gradio-container-5-34-1 .prose th {{
230
- # color: #7d3561;
231
- # font-weight: bold;
232
- # font-size: 20px;
233
- # background: #fffbe9;
234
- # padding: 8px 5px;
235
- # vertical-align: middle;
236
- # border: 0px solid #e0e0e0;
237
- # }}
238
- # td {{
239
- # font-size: 18px;
240
- # padding: 8px 5px;
241
- # border: 0px solid #e0e0e0;
242
- # vertical-align: middle;
243
- # }}
244
- # th:first-child, td:first-child {{
245
- # min-width: {model_col_width}px !important;
246
- # max-width: {model_col_width}px !important;
247
- # width: {model_col_width}px !important;
248
- # text-align: left !important;
249
- # }}
250
- # th:not(:first-child), td:not(:first-child) {{
251
- # min-width: {col_minwidth}px;
252
- # max-width: {col_maxwidth}px;
253
- # width: auto;
254
- # text-align: center;
255
- # }}
256
- # </style>
257
- # """
258
- html += df.to_html(index=False, escape=False)
259
- return html
260
-
261
-
262
-
263
- cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
264
-
265
- def get_lang_table(lang_name):
266
- iso_codes = LANGNAME2ISOS.get(lang_name, [])
267
- if not iso_codes:
268
- return pd.DataFrame([{"Info": "No data for this language"}])
269
- # Find all leaderboards containing any ISO in this language group
270
- pattern = re.compile(r"(^|-)(" + "|".join(re.escape(iso) for iso in iso_codes) + r")(-|$)")
271
- matched_langs = [lb for lb in all_df['leaderboard'].unique() if lb not in ['main'] and pattern.search(lb)]
272
- lang_df = all_df[all_df['leaderboard'].isin(matched_langs)].copy()
273
- if lang_df.empty:
274
- return pd.DataFrame([{"Info": "No data for this language"}])
275
- def make_task_col(row):
276
- lb = row['leaderboard']
277
- task = row['task']
278
- metric = row['metric']
279
- if '-' in lb:
280
- pair_lang = lb.split('-')
281
- pair = lb.replace('-', '_')
282
- # return f"{TASKS_LIST[task]}({task}) {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} ({pair})\n{metric}"
283
- return f"{TASKS_LIST[task]} <br> {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} <br> Metric: {metrics_list[metric]}"
284
- else:
285
- return f"{TASKS_LIST[task]} <br> Metric: {metrics_list[metric]}"
286
- lang_df['task_col'] = lang_df.apply(make_task_col, axis=1)
287
- table = lang_df.pivot_table(index='model', columns='task_col', values='score').reset_index()
288
- score_cols = [col for col in table.columns if col != 'model']
289
- for col in score_cols:
290
- table[col] = table[col].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
291
- def avg_score(row):
292
- vals = []
293
- for col in score_cols:
294
- try:
295
- v = float(row[col])
296
- vals.append(v)
297
- except Exception:
298
- continue
299
- return np.mean(vals) if vals else np.nan
300
- table.insert(1, 'Language Score', table.apply(avg_score, axis=1).apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---"))
301
- table['__overall_score_float'] = table['Language Score'].apply(lambda x: float(x) if x != "---" else np.nan)
302
- table = table.sort_values(by='__overall_score_float', ascending=False, kind="mergesort").reset_index(drop=True)
303
- def get_rank_symbols(scores):
304
- unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
305
- symbols = ["🏆", "🥈", "🥉"]
306
- score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
307
- return [score_to_symbol.get(s, "") for s in scores]
308
- table['rank_symbol'] = get_rank_symbols(table['__overall_score_float'].tolist())
309
- table['model'] = table['rank_symbol'] + ' ' + table['model']
310
- table = table.drop(columns=['rank_symbol', '__overall_score_float'])
311
- return table
312
-