meghsn commited on
Commit
5e482c6
·
1 Parent(s): 5d1fb89

Fixed formatting

Browse files
results/GenericAgent-AgentTrek-1.0-32b/webarena.json CHANGED
@@ -2,7 +2,7 @@
2
  {
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "ac309635-f3fd-417e-ac16-1e0fc943a54f",
5
- "date_time": "2025-01-25 10-16-41",
6
  "benchmark": "WebArena",
7
  "score": 22.4,
8
  "std_err": 1.5,
 
2
  {
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "ac309635-f3fd-417e-ac16-1e0fc943a54f",
5
+ "date_time": "2025-01-25 10:16:41",
6
  "benchmark": "WebArena",
7
  "score": 22.4,
8
  "std_err": 1.5,
results/GenericAgent-AgentTrek-1.0-32b/workarena-l1.json CHANGED
@@ -3,7 +3,7 @@
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "ed14232c-cd7e-4708-b334-ebaf1f220000",
5
  "date_time": "2025-01-12 00:37:04",
6
- "benchmark": "Workarena_L1",
7
  "score": 38.29,
8
  "std_err": 2.70,
9
  "benchmark_specific": "No",
 
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "ed14232c-cd7e-4708-b334-ebaf1f220000",
5
  "date_time": "2025-01-12 00:37:04",
6
+ "benchmark": "WorkArena-L1",
7
  "score": 38.29,
8
  "std_err": 2.70,
9
  "benchmark_specific": "No",
results/GenericAgent-AgentTrek-1.0-32b/workarena-l2.json CHANGED
@@ -3,7 +3,7 @@
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "957fb895-8548-46f4-92f0-5de6be7ceb61",
5
  "date_time": "2025-01-12 09:39:21",
6
- "benchmark": "Workarena_L2",
7
  "score": 2.98,
8
  "std_err": 1.10,
9
  "benchmark_specific": "No",
 
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "957fb895-8548-46f4-92f0-5de6be7ceb61",
5
  "date_time": "2025-01-12 09:39:21",
6
+ "benchmark": "WorkArena-L2",
7
  "score": 2.98,
8
  "std_err": 1.10,
9
  "benchmark_specific": "No",
results/GenericAgent-AgentTrek-1.0-32b/workarena-l3.json CHANGED
@@ -3,7 +3,7 @@
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "a951b33f-d118-4cf4-a2ef-cc2ef204eeb0",
5
  "date_time": "2025-01-13 12:11:45",
6
- "benchmark": "Workarena_L3",
7
  "score": 0.0,
8
  "std_err": 0.0,
9
  "benchmark_specific": "No",
 
3
  "agent_name": "GenericAgent-AgentTrek-1.0-32b",
4
  "study_id": "a951b33f-d118-4cf4-a2ef-cc2ef204eeb0",
5
  "date_time": "2025-01-13 12:11:45",
6
+ "benchmark": "WorkArena-L3",
7
  "score": 0.0,
8
  "std_err": 0.0,
9
  "benchmark_specific": "No",