Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
092c06a
1
Parent(s):
5fa433f
Block gemini-2.5-pro-exp-03-25
Browse files- evals/main.py +1 -1
- evals/models.py +6 -4
- languages.json +5 -5
- models.json +66 -0
- results.json +0 -0
evals/main.py
CHANGED
|
@@ -10,7 +10,7 @@ from tasks import tasks
|
|
| 10 |
# ===== config =====
|
| 11 |
|
| 12 |
n_sentences = 10
|
| 13 |
-
n_languages =
|
| 14 |
n_models = 25
|
| 15 |
|
| 16 |
# ===== run evaluation and aggregate results =====
|
|
|
|
| 10 |
# ===== config =====
|
| 11 |
|
| 12 |
n_sentences = 10
|
| 13 |
+
n_languages = 20
|
| 14 |
n_models = 25
|
| 15 |
|
| 16 |
# ===== run evaluation and aggregate results =====
|
evals/models.py
CHANGED
|
@@ -44,6 +44,10 @@ models = [
|
|
| 44 |
"amazon/nova-micro-v1", # 0.09$
|
| 45 |
]
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
transcription_models = [
|
| 48 |
"elevenlabs/scribe_v1",
|
| 49 |
"openai/whisper-large-v3",
|
|
@@ -58,7 +62,6 @@ cache = Memory(location=".cache", verbose=0).cache
|
|
| 58 |
def get_models(date: date):
|
| 59 |
return get("https://openrouter.ai/api/frontend/models").json()["data"]
|
| 60 |
|
| 61 |
-
|
| 62 |
def get_slug(permaslug):
|
| 63 |
models = get_models(date.today())
|
| 64 |
slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
|
|
@@ -88,12 +91,11 @@ def get_current_popular_models(date: date):
|
|
| 88 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
| 89 |
return [get_slug(model["model_permaslug"]) for model in data]
|
| 90 |
|
| 91 |
-
|
| 92 |
models += [
|
| 93 |
-
m for m in get_historical_popular_models(date.today()) if m and m not in models
|
| 94 |
][:5]
|
| 95 |
models += [
|
| 96 |
-
m for m in get_current_popular_models(date.today()) if m and m not in models
|
| 97 |
][:5]
|
| 98 |
|
| 99 |
|
|
|
|
| 44 |
"amazon/nova-micro-v1", # 0.09$
|
| 45 |
]
|
| 46 |
|
| 47 |
+
blocklist = [
|
| 48 |
+
"google/gemini-2.5-pro-exp-03-25" # rate limit too low
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
transcription_models = [
|
| 52 |
"elevenlabs/scribe_v1",
|
| 53 |
"openai/whisper-large-v3",
|
|
|
|
| 62 |
def get_models(date: date):
|
| 63 |
return get("https://openrouter.ai/api/frontend/models").json()["data"]
|
| 64 |
|
|
|
|
| 65 |
def get_slug(permaslug):
|
| 66 |
models = get_models(date.today())
|
| 67 |
slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
|
|
|
|
| 91 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
| 92 |
return [get_slug(model["model_permaslug"]) for model in data]
|
| 93 |
|
|
|
|
| 94 |
models += [
|
| 95 |
+
m for m in get_historical_popular_models(date.today()) if m and m not in models and m not in blocklist
|
| 96 |
][:5]
|
| 97 |
models += [
|
| 98 |
+
m for m in get_current_popular_models(date.today()) if m and m not in models and m not in blocklist
|
| 99 |
][:5]
|
| 100 |
|
| 101 |
|
languages.json
CHANGED
|
@@ -1027,7 +1027,7 @@
|
|
| 1027 |
"family":"Uralic",
|
| 1028 |
"flores_path":"hun_Latn",
|
| 1029 |
"fleurs_tag":"hu_hu",
|
| 1030 |
-
"commonvoice_hours":
|
| 1031 |
"commonvoice_locale":"hu",
|
| 1032 |
"in_benchmark":true
|
| 1033 |
},
|
|
@@ -2359,7 +2359,7 @@
|
|
| 2359 |
"family":"Atlantic-Congo",
|
| 2360 |
"flores_path":null,
|
| 2361 |
"fleurs_tag":null,
|
| 2362 |
-
"commonvoice_hours":4.
|
| 2363 |
"commonvoice_locale":"ibb",
|
| 2364 |
"in_benchmark":false
|
| 2365 |
},
|
|
@@ -5347,7 +5347,7 @@
|
|
| 5347 |
"family":"Atlantic-Congo",
|
| 5348 |
"flores_path":null,
|
| 5349 |
"fleurs_tag":null,
|
| 5350 |
-
"commonvoice_hours":2.
|
| 5351 |
"commonvoice_locale":"mua",
|
| 5352 |
"in_benchmark":false
|
| 5353 |
},
|
|
@@ -6199,7 +6199,7 @@
|
|
| 6199 |
"family":"Atlantic-Congo",
|
| 6200 |
"flores_path":null,
|
| 6201 |
"fleurs_tag":null,
|
| 6202 |
-
"commonvoice_hours":
|
| 6203 |
"commonvoice_locale":"jgo",
|
| 6204 |
"in_benchmark":false
|
| 6205 |
},
|
|
@@ -6367,7 +6367,7 @@
|
|
| 6367 |
"family":"Indo-European",
|
| 6368 |
"flores_path":null,
|
| 6369 |
"fleurs_tag":null,
|
| 6370 |
-
"commonvoice_hours":0.
|
| 6371 |
"commonvoice_locale":"btv",
|
| 6372 |
"in_benchmark":false
|
| 6373 |
},
|
|
|
|
| 1027 |
"family":"Uralic",
|
| 1028 |
"flores_path":"hun_Latn",
|
| 1029 |
"fleurs_tag":"hu_hu",
|
| 1030 |
+
"commonvoice_hours":93.0,
|
| 1031 |
"commonvoice_locale":"hu",
|
| 1032 |
"in_benchmark":true
|
| 1033 |
},
|
|
|
|
| 2359 |
"family":"Atlantic-Congo",
|
| 2360 |
"flores_path":null,
|
| 2361 |
"fleurs_tag":null,
|
| 2362 |
+
"commonvoice_hours":4.3,
|
| 2363 |
"commonvoice_locale":"ibb",
|
| 2364 |
"in_benchmark":false
|
| 2365 |
},
|
|
|
|
| 5347 |
"family":"Atlantic-Congo",
|
| 5348 |
"flores_path":null,
|
| 5349 |
"fleurs_tag":null,
|
| 5350 |
+
"commonvoice_hours":2.6,
|
| 5351 |
"commonvoice_locale":"mua",
|
| 5352 |
"in_benchmark":false
|
| 5353 |
},
|
|
|
|
| 6199 |
"family":"Atlantic-Congo",
|
| 6200 |
"flores_path":null,
|
| 6201 |
"fleurs_tag":null,
|
| 6202 |
+
"commonvoice_hours":4.9,
|
| 6203 |
"commonvoice_locale":"jgo",
|
| 6204 |
"in_benchmark":false
|
| 6205 |
},
|
|
|
|
| 6367 |
"family":"Indo-European",
|
| 6368 |
"flores_path":null,
|
| 6369 |
"fleurs_tag":null,
|
| 6370 |
+
"commonvoice_hours":0.8,
|
| 6371 |
"commonvoice_locale":"btv",
|
| 6372 |
"in_benchmark":false
|
| 6373 |
},
|
models.json
CHANGED
|
@@ -218,5 +218,71 @@
|
|
| 218 |
"type":"Commercial",
|
| 219 |
"license":null,
|
| 220 |
"creation_date":1733356800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
}
|
| 222 |
]
|
|
|
|
| 218 |
"type":"Commercial",
|
| 219 |
"license":null,
|
| 220 |
"creation_date":1733356800000
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"id":"google\/gemini-2.0-flash-001",
|
| 224 |
+
"name":"Gemini 2.0 Flash",
|
| 225 |
+
"provider_name":"Google",
|
| 226 |
+
"cost":0.4,
|
| 227 |
+
"hf_id":null,
|
| 228 |
+
"size":null,
|
| 229 |
+
"type":"Commercial",
|
| 230 |
+
"license":null,
|
| 231 |
+
"creation_date":1738713600000
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"id":"google\/gemini-flash-1.5",
|
| 235 |
+
"name":"Gemini 1.5 Flash ",
|
| 236 |
+
"provider_name":"Google",
|
| 237 |
+
"cost":0.3,
|
| 238 |
+
"hf_id":null,
|
| 239 |
+
"size":null,
|
| 240 |
+
"type":"Commercial",
|
| 241 |
+
"license":null,
|
| 242 |
+
"creation_date":1715644800000
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"id":"google\/gemini-flash-1.5-8b",
|
| 246 |
+
"name":"Gemini 1.5 Flash 8B",
|
| 247 |
+
"provider_name":"Google",
|
| 248 |
+
"cost":0.15,
|
| 249 |
+
"hf_id":null,
|
| 250 |
+
"size":null,
|
| 251 |
+
"type":"Commercial",
|
| 252 |
+
"license":null,
|
| 253 |
+
"creation_date":1727913600000
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"id":"gryphe\/mythomax-l2-13b",
|
| 257 |
+
"name":"MythoMax 13B",
|
| 258 |
+
"provider_name":"MythoMax 13B",
|
| 259 |
+
"cost":0.07,
|
| 260 |
+
"hf_id":"Gryphe\/MythoMax-L2-13b",
|
| 261 |
+
"size":null,
|
| 262 |
+
"type":"Open",
|
| 263 |
+
"license":"Other",
|
| 264 |
+
"creation_date":1691625600000
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"id":"microsoft\/wizardlm-2-8x22b",
|
| 268 |
+
"name":"WizardLM-2 8x22B",
|
| 269 |
+
"provider_name":"WizardLM-2 8x22B",
|
| 270 |
+
"cost":0.5,
|
| 271 |
+
"hf_id":null,
|
| 272 |
+
"size":null,
|
| 273 |
+
"type":"Commercial",
|
| 274 |
+
"license":null,
|
| 275 |
+
"creation_date":1713225600000
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"id":"x-ai\/grok-3-mini-beta",
|
| 279 |
+
"name":"Grok 3 Mini Beta",
|
| 280 |
+
"provider_name":"xAI",
|
| 281 |
+
"cost":0.5,
|
| 282 |
+
"hf_id":null,
|
| 283 |
+
"size":null,
|
| 284 |
+
"type":"Commercial",
|
| 285 |
+
"license":null,
|
| 286 |
+
"creation_date":1744156800000
|
| 287 |
}
|
| 288 |
]
|
results.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|