eduagarcia commited on
Commit
b9392b7
·
verified ·
1 Parent(s): 6e87864

Add LLama3 e Gamini 2.0 Flash models from Google

Browse files
Files changed (1) hide show
  1. external_models_results.json +91 -1
external_models_results.json CHANGED
@@ -245,7 +245,7 @@
245
  {
246
  "model": "llama_405b_instruct",
247
  "name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
248
- "link": "https://cloud.google.com/vertex-ai",
249
  "date": "2024-08-20",
250
  "status": "full",
251
  "main_language": "English",
@@ -286,5 +286,95 @@
286
  },
287
  "result_metrics_average": 0.8231799251828895,
288
  "result_metrics_npm": 0.7241097388486535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  }
290
  ]
 
245
  {
246
  "model": "llama_405b_instruct",
247
  "name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
248
+ "link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct",
249
  "date": "2024-08-20",
250
  "status": "full",
251
  "main_language": "English",
 
286
  },
287
  "result_metrics_average": 0.8231799251828895,
288
  "result_metrics_npm": 0.7241097388486535
289
+ },
290
+ {
291
+ "model": "llama3_3_70b",
292
+ "name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)",
293
+ "link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
294
+ "date": "2025-04-03",
295
+ "status": "full",
296
+ "main_language": "English",
297
+ "model_type": "chat",
298
+ "params": 70.6,
299
+ "result_metrics": {
300
+ "enem_challenge": 0.8320503848845346,
301
+ "bluex": 0.7593880389429764,
302
+ "oab_exams": 0.6733485193621868,
303
+ "assin2_sts": 0.7275578599896508,
304
+ "assin2_rte": 0.9407071010860484,
305
+ "faquad_nli": 0.8787563033858187,
306
+ "hatebr_offensive": 0.9024358249091997,
307
+ "portuguese_hate_speech": 0.7042216543825339,
308
+ "tweetsentbr": 0.7076749453899551
309
+ },
310
+ "result_metrics_average": 0.791793403592545,
311
+ "result_metrics_npm": 0.6924788466103498
312
+ },
313
+ {
314
+ "model": "llama3_2_90b",
315
+ "name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)",
316
+ "link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct",
317
+ "date": "2025-04-03",
318
+ "status": "full",
319
+ "main_language": "English",
320
+ "model_type": "chat",
321
+ "params": 88.6,
322
+ "result_metrics": {
323
+ "enem_challenge": 0.821553533939818,
324
+ "bluex": 0.7482614742698191,
325
+ "oab_exams": 0.7061503416856492,
326
+ "assin2_sts": 0.7368518566379951,
327
+ "assin2_rte": 0.9216548775103446,
328
+ "faquad_nli": 0.8632015306122449,
329
+ "hatebr_offensive": 0.8965270877302478,
330
+ "portuguese_hate_speech": 0.7059127552081422,
331
+ "tweetsentbr": 0.7352076218951984
332
+ },
333
+ "result_metrics_average": 0.7928134532766066,
334
+ "result_metrics_npm": 0.6915070359785283
335
+ },
336
+ {
337
+ "model": "gemini-2.0-flash-001",
338
+ "name": "Gemini 2.0 Flash (001)",
339
+ "link": "https://cloud.google.com/vertex-ai",
340
+ "date": "2025-04-03",
341
+ "status": "full",
342
+ "main_language": "English",
343
+ "model_type": "proprietary",
344
+ "result_metrics": {
345
+ "enem_challenge": 0.8789363191042687,
346
+ "bluex": 0.803894297635605,
347
+ "oab_exams": 0.7767653758542141,
348
+ "assin2_sts": 0.8440142633742483,
349
+ "assin2_rte": 0.9305165510724053,
350
+ "faquad_nli": 0.7533651260745065,
351
+ "hatebr_offensive": 0.8890432813545366,
352
+ "portuguese_hate_speech": 0.7655392938544128,
353
+ "tweetsentbr": 0.7652542619451799
354
+ },
355
+ "result_metrics_average": 0.8230365300299308,
356
+ "result_metrics_npm": 0.7253778946033657
357
+ },
358
+ {
359
+ "model": "gemini-2.0-flash-lite-001",
360
+ "name": "Gemini 2.0 Flash Lite (001)",
361
+ "link": "https://cloud.google.com/vertex-ai",
362
+ "date": "2025-04-03",
363
+ "status": "full",
364
+ "main_language": "English",
365
+ "model_type": "proprietary",
366
+ "result_metrics": {
367
+ "enem_challenge": 0.8509447165850245,
368
+ "bluex": 0.7872044506258693,
369
+ "oab_exams": 0.7061503416856492,
370
+ "assin2_sts": 0.8492479991621328,
371
+ "assin2_rte": 0.9216548775103446,
372
+ "faquad_nli": 0.7652777777777777,
373
+ "hatebr_offensive": 0.8522499647780968,
374
+ "portuguese_hate_speech": 0.7501387383201693,
375
+ "tweetsentbr": 0.7675746509081982
376
+ },
377
+ "result_metrics_average": 0.8056048352614735,
378
+ "result_metrics_npm": 0.6986042497176748
379
  }
380
  ]