Commit
·
24c8d00
1
Parent(s):
1e9c5dd
revert to correct usage of ModelDetails (without api)
Browse files- app.py +1 -1
- src/display/utils.py +3 -4
- src/leaderboard/read_evals.py +2 -4
app.py
CHANGED
@@ -289,7 +289,7 @@ with demo:
|
|
289 |
with gr.Row():
|
290 |
with gr.Column():
|
291 |
model_api = gr.Dropdown(
|
292 |
-
choices=[a.value.
|
293 |
label="Model API",
|
294 |
multiselect=False,
|
295 |
value="hf",
|
|
|
289 |
with gr.Row():
|
290 |
with gr.Column():
|
291 |
model_api = gr.Dropdown(
|
292 |
+
choices=[a.value.name for a in ModelAPI],
|
293 |
label="Model API",
|
294 |
multiselect=False,
|
295 |
value="hf",
|
src/display/utils.py
CHANGED
@@ -59,13 +59,12 @@ class ModelDetails:
|
|
59 |
name: str
|
60 |
display_name: str = ""
|
61 |
symbol: str = "" # emoji
|
62 |
-
api: str = "hf"
|
63 |
|
64 |
|
65 |
class ModelAPI(Enum):
|
66 |
-
hf = ModelDetails(name="
|
67 |
-
openai = ModelDetails(name="
|
68 |
-
anthropic = ModelDetails(name="
|
69 |
|
70 |
|
71 |
class ModelType(Enum):
|
|
|
59 |
name: str
|
60 |
display_name: str = ""
|
61 |
symbol: str = "" # emoji
|
|
|
62 |
|
63 |
|
64 |
class ModelAPI(Enum):
|
65 |
+
hf = ModelDetails(name="hf")
|
66 |
+
openai = ModelDetails(name="openai-chat-completions")
|
67 |
+
anthropic = ModelDetails(name="anthropic-chat-completions")
|
68 |
|
69 |
|
70 |
class ModelType(Enum):
|
src/leaderboard/read_evals.py
CHANGED
@@ -109,6 +109,7 @@ class EvalResult:
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
112 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
@@ -125,6 +126,7 @@ class EvalResult:
|
|
125 |
AutoEvalColumn.params.name: self.num_params,
|
126 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
127 |
}
|
|
|
128 |
|
129 |
for task in Tasks:
|
130 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
@@ -157,14 +159,11 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
157 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
158 |
"""From the path of the results folder root, extract all needed info for results"""
|
159 |
model_result_filepaths = []
|
160 |
-
print("Files in dir:")
|
161 |
-
print(os.listdir("."))
|
162 |
|
163 |
for root, _, files in os.walk(results_path):
|
164 |
# We should only have json files in model results
|
165 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
166 |
continue
|
167 |
-
print(f"root: {root}, files: {[file for file in files]}")
|
168 |
|
169 |
# Sort the files by date
|
170 |
try:
|
@@ -175,7 +174,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
175 |
for file in files:
|
176 |
model_result_filepaths.append(os.path.join(root, file))
|
177 |
|
178 |
-
print(f"Model results: {model_result_filepaths}")
|
179 |
|
180 |
eval_results = {}
|
181 |
for model_result_filepath in model_result_filepaths:
|
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
+
print([v for v in self.results.values() if v is not None])
|
113 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
114 |
data_dict = {
|
115 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
126 |
AutoEvalColumn.params.name: self.num_params,
|
127 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
128 |
}
|
129 |
+
print(data_dict)
|
130 |
|
131 |
for task in Tasks:
|
132 |
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
|
|
159 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
160 |
"""From the path of the results folder root, extract all needed info for results"""
|
161 |
model_result_filepaths = []
|
|
|
|
|
162 |
|
163 |
for root, _, files in os.walk(results_path):
|
164 |
# We should only have json files in model results
|
165 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
166 |
continue
|
|
|
167 |
|
168 |
# Sort the files by date
|
169 |
try:
|
|
|
174 |
for file in files:
|
175 |
model_result_filepaths.append(os.path.join(root, file))
|
176 |
|
|
|
177 |
|
178 |
eval_results = {}
|
179 |
for model_result_filepath in model_result_filepaths:
|