Spaces:
Running
Running
Update src/helper.py
Browse files- src/helper.py +14 -4
src/helper.py
CHANGED
@@ -5,7 +5,7 @@ import json
|
|
5 |
import numpy as np
|
6 |
from statistics import mean
|
7 |
import re
|
8 |
-
from datasets import load_dataset
|
9 |
import os
|
10 |
from collections import defaultdict
|
11 |
from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
|
@@ -53,13 +53,23 @@ TASK_TO_CLUSTER_MAP = {
|
|
53 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
54 |
|
55 |
def load_private_leaderboard_df():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
ds = load_dataset(
|
57 |
path=SAHARA_DATA,
|
58 |
name=None,
|
59 |
-
data_files=
|
60 |
split="train",
|
61 |
download_mode="force_redownload"
|
62 |
)
|
|
|
63 |
return ds.to_pandas()
|
64 |
metrics_list={
|
65 |
'bleu_1k':'spBleu<sup>1K</sup>',
|
@@ -155,9 +165,9 @@ def compare_models(model_1_name, model_2_name):
|
|
155 |
if pd.isna(d):
|
156 |
return "---"
|
157 |
if d > 0.001: # Model 1 is better
|
158 |
-
return f"<span style='color:green; font-weight:bold;'>+{d:.2f}</span>"
|
159 |
elif d < -0.001: # Model 2 is better
|
160 |
-
return f"<span style='color:red; font-weight:bold;'>{d:.2f}</span>"
|
161 |
else:
|
162 |
return f"{d:.2f}"
|
163 |
|
|
|
5 |
import numpy as np
|
6 |
from statistics import mean
|
7 |
import re
|
8 |
+
from datasets import load_dataset, concatenate_datasets
|
9 |
import os
|
10 |
from collections import defaultdict
|
11 |
from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
|
|
|
53 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
54 |
|
55 |
def load_private_leaderboard_df():
|
56 |
+
|
57 |
+
all_repo_files = API.list_repo_files(repo_id=SAHARA_DATA, repo_type="dataset")
|
58 |
+
folder_path = "data/users/"
|
59 |
+
jsonl_files_in_folder = [
|
60 |
+
f for f in all_repo_files
|
61 |
+
if f.startswith(folder_path) and f.endswith(".jsonl")
|
62 |
+
]
|
63 |
+
jsonl_files_in_folder.append(SAHARA_RESULTS)
|
64 |
+
print("++++++",jsonl_files_in_folder)
|
65 |
ds = load_dataset(
|
66 |
path=SAHARA_DATA,
|
67 |
name=None,
|
68 |
+
data_files=jsonl_files_in_folder,
|
69 |
split="train",
|
70 |
download_mode="force_redownload"
|
71 |
)
|
72 |
+
print(">>>>>>>", ds)
|
73 |
return ds.to_pandas()
|
74 |
metrics_list={
|
75 |
'bleu_1k':'spBleu<sup>1K</sup>',
|
|
|
165 |
if pd.isna(d):
|
166 |
return "---"
|
167 |
if d > 0.001: # Model 1 is better
|
168 |
+
return f"<span style='color:green !important; font-weight:bold !important;'>+{d:.2f}</span>"
|
169 |
elif d < -0.001: # Model 2 is better
|
170 |
+
return f"<span style='color:red !important; font-weight:bold !important;'>{d:.2f}</span>"
|
171 |
else:
|
172 |
return f"{d:.2f}"
|
173 |
|