Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
shigeki Ishida
commited on
Commit
·
67542c9
1
Parent(s):
b4dce55
Add parquet file support
Browse files- app.py +5 -1
- src/leaderboard/read_evals.py +63 -33
- src/populate.py +0 -1
app.py
CHANGED
@@ -89,7 +89,11 @@ except Exception:
|
|
89 |
FAILED_EVAL_QUEUE_DF,
|
90 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
91 |
|
92 |
-
ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
|
|
|
|
|
|
|
|
93 |
MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()
|
94 |
|
95 |
|
|
|
89 |
FAILED_EVAL_QUEUE_DF,
|
90 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
91 |
|
92 |
+
# ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
93 |
+
# Get dataframes
|
94 |
+
results_path = "eval-results/leaderboard.parquet"
|
95 |
+
|
96 |
+
ORIGINAL_DF = get_leaderboard_df(results_path, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
97 |
MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()
|
98 |
|
99 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -5,6 +5,7 @@ from dataclasses import dataclass
|
|
5 |
from decimal import Decimal
|
6 |
|
7 |
import dateutil
|
|
|
8 |
|
9 |
from src.display.formatting import make_clickable_model
|
10 |
from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
|
@@ -37,9 +38,12 @@ class EvalResult:
|
|
37 |
|
38 |
@classmethod
|
39 |
def init_from_json_file(self, json_filepath):
|
40 |
-
"""Inits the result from the specific model result file"""
|
41 |
-
|
42 |
-
data =
|
|
|
|
|
|
|
43 |
|
44 |
config = data.get("config")
|
45 |
metainfo = config.get("metainfo", {})
|
@@ -183,35 +187,63 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
183 |
|
184 |
|
185 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
186 |
-
"""From the path of the results folder root, extract all needed info for results"""
|
187 |
-
model_result_filepaths = []
|
188 |
-
|
189 |
-
for root, _, files in os.walk(results_path):
|
190 |
-
# We should only have json files in model results
|
191 |
-
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
192 |
-
continue
|
193 |
-
|
194 |
-
# Sort the files by date
|
195 |
-
try:
|
196 |
-
files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
|
197 |
-
except dateutil.parser._parser.ParserError:
|
198 |
-
files = [files[-1]]
|
199 |
-
|
200 |
-
for file in files:
|
201 |
-
model_result_filepaths.append(os.path.join(root, file))
|
202 |
-
|
203 |
eval_results = {}
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
results = []
|
217 |
for v in eval_results.values():
|
@@ -220,7 +252,5 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
220 |
results.append(v)
|
221 |
except KeyError: # not all eval values present
|
222 |
continue
|
223 |
-
# print(f"Processing file: {model_result_filepath}")
|
224 |
-
# print(f"Eval result: {eval_result.to_dict()}")
|
225 |
|
226 |
return results
|
|
|
5 |
from decimal import Decimal
|
6 |
|
7 |
import dateutil
|
8 |
+
import pandas as pd
|
9 |
|
10 |
from src.display.formatting import make_clickable_model
|
11 |
from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
|
|
|
38 |
|
39 |
@classmethod
|
40 |
def init_from_json_file(self, json_filepath):
|
41 |
+
"""Inits the result from the specific model result file or dict"""
|
42 |
+
if isinstance(json_filepath, dict):
|
43 |
+
data = json_filepath
|
44 |
+
else:
|
45 |
+
with open(json_filepath) as fp:
|
46 |
+
data = json.load(fp)
|
47 |
|
48 |
config = data.get("config")
|
49 |
metainfo = config.get("metainfo", {})
|
|
|
187 |
|
188 |
|
189 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
190 |
+
"""From the path of the results folder root or parquet file, extract all needed info for results"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
eval_results = {}
|
192 |
+
|
193 |
+
if results_path.endswith(".parquet"):
|
194 |
+
df = pd.read_parquet(results_path)
|
195 |
+
for _, row in df.iterrows():
|
196 |
+
data = {
|
197 |
+
"scores": {
|
198 |
+
col.replace("scores.", ""): str(row[col]) for col in df.columns if col.startswith("scores.")
|
199 |
+
},
|
200 |
+
"config": {
|
201 |
+
"model_name": row.get("config.model.pretrained_model_name_or_path"),
|
202 |
+
"model": {
|
203 |
+
"dtype": row.get("config.model.dtype"),
|
204 |
+
"revision": row.get("config.model.revision"),
|
205 |
+
"_target_": row.get("config.model._target_"),
|
206 |
+
},
|
207 |
+
"metainfo": {
|
208 |
+
"num_few_shots": row.get("config.metainfo.num_few_shots"),
|
209 |
+
"version": row.get("config.metainfo.version"),
|
210 |
+
},
|
211 |
+
"pipeline_kwargs": {"add_special_tokens": row.get("config.pipeline_kwargs.add_special_tokens")},
|
212 |
+
},
|
213 |
+
}
|
214 |
+
eval_result = EvalResult.init_from_json_file(data)
|
215 |
+
eval_result.update_with_request_file(requests_path)
|
216 |
+
|
217 |
+
eval_name = eval_result.eval_name
|
218 |
+
if eval_name in eval_results:
|
219 |
+
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
220 |
+
else:
|
221 |
+
eval_results[eval_name] = eval_result
|
222 |
+
else:
|
223 |
+
# JSON
|
224 |
+
model_result_filepaths = []
|
225 |
+
for root, _, files in os.walk(results_path):
|
226 |
+
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
227 |
+
continue
|
228 |
+
|
229 |
+
try:
|
230 |
+
files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
|
231 |
+
except dateutil.parser._parser.ParserError:
|
232 |
+
files = [files[-1]]
|
233 |
+
|
234 |
+
for file in files:
|
235 |
+
model_result_filepaths.append(os.path.join(root, file))
|
236 |
+
|
237 |
+
eval_results = {}
|
238 |
+
for model_result_filepath in model_result_filepaths:
|
239 |
+
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
240 |
+
eval_result.update_with_request_file(requests_path)
|
241 |
+
|
242 |
+
eval_name = eval_result.eval_name
|
243 |
+
if eval_name in eval_results.keys():
|
244 |
+
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
245 |
+
else:
|
246 |
+
eval_results[eval_name] = eval_result
|
247 |
|
248 |
results = []
|
249 |
for v in eval_results.values():
|
|
|
252 |
results.append(v)
|
253 |
except KeyError: # not all eval values present
|
254 |
continue
|
|
|
|
|
255 |
|
256 |
return results
|
src/populate.py
CHANGED
@@ -14,7 +14,6 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
|
18 |
# Add a row ID column
|
19 |
df[AutoEvalColumn.row_id.name] = range(len(df))
|
20 |
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
17 |
# Add a row ID column
|
18 |
df[AutoEvalColumn.row_id.name] = range(len(df))
|
19 |
|