Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add LLama3 e Gamini 2.0 Flash models from Google
Browse files- external_models_results.json +91 -1
external_models_results.json
CHANGED
@@ -245,7 +245,7 @@
|
|
245 |
{
|
246 |
"model": "llama_405b_instruct",
|
247 |
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
248 |
-
"link": "https://
|
249 |
"date": "2024-08-20",
|
250 |
"status": "full",
|
251 |
"main_language": "English",
|
@@ -286,5 +286,95 @@
|
|
286 |
},
|
287 |
"result_metrics_average": 0.8231799251828895,
|
288 |
"result_metrics_npm": 0.7241097388486535
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
}
|
290 |
]
|
|
|
245 |
{
|
246 |
"model": "llama_405b_instruct",
|
247 |
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
248 |
+
"link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct",
|
249 |
"date": "2024-08-20",
|
250 |
"status": "full",
|
251 |
"main_language": "English",
|
|
|
286 |
},
|
287 |
"result_metrics_average": 0.8231799251828895,
|
288 |
"result_metrics_npm": 0.7241097388486535
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"model": "llama3_3_70b",
|
292 |
+
"name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)",
|
293 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
294 |
+
"date": "2025-04-03",
|
295 |
+
"status": "full",
|
296 |
+
"main_language": "English",
|
297 |
+
"model_type": "chat",
|
298 |
+
"params": 70.6,
|
299 |
+
"result_metrics": {
|
300 |
+
"enem_challenge": 0.8320503848845346,
|
301 |
+
"bluex": 0.7593880389429764,
|
302 |
+
"oab_exams": 0.6733485193621868,
|
303 |
+
"assin2_sts": 0.7275578599896508,
|
304 |
+
"assin2_rte": 0.9407071010860484,
|
305 |
+
"faquad_nli": 0.8787563033858187,
|
306 |
+
"hatebr_offensive": 0.9024358249091997,
|
307 |
+
"portuguese_hate_speech": 0.7042216543825339,
|
308 |
+
"tweetsentbr": 0.7076749453899551
|
309 |
+
},
|
310 |
+
"result_metrics_average": 0.791793403592545,
|
311 |
+
"result_metrics_npm": 0.6924788466103498
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"model": "llama3_2_90b",
|
315 |
+
"name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)",
|
316 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct",
|
317 |
+
"date": "2025-04-03",
|
318 |
+
"status": "full",
|
319 |
+
"main_language": "English",
|
320 |
+
"model_type": "chat",
|
321 |
+
"params": 88.6,
|
322 |
+
"result_metrics": {
|
323 |
+
"enem_challenge": 0.821553533939818,
|
324 |
+
"bluex": 0.7482614742698191,
|
325 |
+
"oab_exams": 0.7061503416856492,
|
326 |
+
"assin2_sts": 0.7368518566379951,
|
327 |
+
"assin2_rte": 0.9216548775103446,
|
328 |
+
"faquad_nli": 0.8632015306122449,
|
329 |
+
"hatebr_offensive": 0.8965270877302478,
|
330 |
+
"portuguese_hate_speech": 0.7059127552081422,
|
331 |
+
"tweetsentbr": 0.7352076218951984
|
332 |
+
},
|
333 |
+
"result_metrics_average": 0.7928134532766066,
|
334 |
+
"result_metrics_npm": 0.6915070359785283
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"model": "gemini-2.0-flash-001",
|
338 |
+
"name": "Gemini 2.0 Flash (001)",
|
339 |
+
"link": "https://cloud.google.com/vertex-ai",
|
340 |
+
"date": "2025-04-03",
|
341 |
+
"status": "full",
|
342 |
+
"main_language": "English",
|
343 |
+
"model_type": "proprietary",
|
344 |
+
"result_metrics": {
|
345 |
+
"enem_challenge": 0.8789363191042687,
|
346 |
+
"bluex": 0.803894297635605,
|
347 |
+
"oab_exams": 0.7767653758542141,
|
348 |
+
"assin2_sts": 0.8440142633742483,
|
349 |
+
"assin2_rte": 0.9305165510724053,
|
350 |
+
"faquad_nli": 0.7533651260745065,
|
351 |
+
"hatebr_offensive": 0.8890432813545366,
|
352 |
+
"portuguese_hate_speech": 0.7655392938544128,
|
353 |
+
"tweetsentbr": 0.7652542619451799
|
354 |
+
},
|
355 |
+
"result_metrics_average": 0.8230365300299308,
|
356 |
+
"result_metrics_npm": 0.7253778946033657
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"model": "gemini-2.0-flash-lite-001",
|
360 |
+
"name": "Gemini 2.0 Flash Lite (001)",
|
361 |
+
"link": "https://cloud.google.com/vertex-ai",
|
362 |
+
"date": "2025-04-03",
|
363 |
+
"status": "full",
|
364 |
+
"main_language": "English",
|
365 |
+
"model_type": "proprietary",
|
366 |
+
"result_metrics": {
|
367 |
+
"enem_challenge": 0.8509447165850245,
|
368 |
+
"bluex": 0.7872044506258693,
|
369 |
+
"oab_exams": 0.7061503416856492,
|
370 |
+
"assin2_sts": 0.8492479991621328,
|
371 |
+
"assin2_rte": 0.9216548775103446,
|
372 |
+
"faquad_nli": 0.7652777777777777,
|
373 |
+
"hatebr_offensive": 0.8522499647780968,
|
374 |
+
"portuguese_hate_speech": 0.7501387383201693,
|
375 |
+
"tweetsentbr": 0.7675746509081982
|
376 |
+
},
|
377 |
+
"result_metrics_average": 0.8056048352614735,
|
378 |
+
"result_metrics_npm": 0.6986042497176748
|
379 |
}
|
380 |
]
|