Enderchef commited on
Commit
2423db5
·
verified ·
1 Parent(s): 4e79574

Update eval.jsonl

Browse files
Files changed (1) hide show
  1. eval.jsonl +6 -0
eval.jsonl CHANGED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"model_id": "Gemini 2.5 Pro", "benchmark": "MMLU", "subject": "ALL", "accuracy": 88.00, "sample_count": 1000, "timestamp": "2025-06-25T14:17:00.000000"}
2
+ {"model_id": "ChatGPT 4.5", "benchmark": "MMLU", "subject": "ALL", "accuracy": 86.50, "sample_count": 1000, "timestamp": "2025-06-25T14:17:01.000000"}
3
+ {"model_id": "Llama 4 Scout", "benchmark": "MMLU", "subject": "ALL", "accuracy": 85.00, "sample_count": 1000, "timestamp": "2025-06-25T14:17:02.000000"}
4
+ {"model_id": "Qwen3 235B A22B", "benchmark": "MMLU", "subject": "ALL", "accuracy": 87.20, "sample_count": 1000, "timestamp": "2025-06-25T14:17:03.000000"}
5
+ {"model_id": "Mistral-Small-3.2-24B-Instruct-2506", "benchmark": "MMLU", "subject": "ALL", "accuracy": 84.80, "sample_count": 1000, "timestamp": "2025-06-25T14:17:04.000000"}
6
+ {"model_id": "Claude 4 Opus", "benchmark": "MMLU", "subject": "ALL", "accuracy": 89.10, "sample_count": 1000, "timestamp": "2025-06-25T14:17:05.000000"}