Armeddinosaur commited on
Commit
a325fdc
·
1 Parent(s): c52f9e4

Updating Metric

Browse files
src/data/metrics/absolute_improvement_to_baseline.json CHANGED
@@ -60,12 +60,12 @@
60
  "MLAB (llama3-1-405b-instruct)": 3.8
61
  },
62
  "backdoor-trigger-recovery": {
63
- "MLAB (gpt-4o)": 74.0,
64
- "Top Human in Competition": 621.3,
65
  "CoI-Agent (o1) + MLAB (gpt-4o)": 24.9,
 
66
  "MLAB (claude-3-5-sonnet-v2)": 247.9,
67
  "MLAB (gemini-exp-1206)": 80.4,
68
  "MLAB (o3-mini)": 38.8,
 
69
  "MLAB (llama3-1-405b-instruct)": 71.7,
70
  "Human Idea + MLAB (gpt-4o)": 54.5
71
  }
 
60
  "MLAB (llama3-1-405b-instruct)": 3.8
61
  },
62
  "backdoor-trigger-recovery": {
 
 
63
  "CoI-Agent (o1) + MLAB (gpt-4o)": 24.9,
64
+ "Top Human in Competition": 621.3,
65
  "MLAB (claude-3-5-sonnet-v2)": 247.9,
66
  "MLAB (gemini-exp-1206)": 80.4,
67
  "MLAB (o3-mini)": 38.8,
68
+ "MLAB (gpt-4o)": 64.5,
69
  "MLAB (llama3-1-405b-instruct)": 71.7,
70
  "Human Idea + MLAB (gpt-4o)": 54.5
71
  }
src/data/metrics/relative_improvement_to_human.json CHANGED
@@ -60,12 +60,12 @@
60
  "MLAB (llama3-1-405b-instruct)": 6.2
61
  },
62
  "backdoor-trigger-recovery": {
63
- "MLAB (gpt-4o)": 11.9,
64
- "Top Human in Competition": 100.0,
65
  "CoI-Agent (o1) + MLAB (gpt-4o)": 4.0,
 
66
  "MLAB (claude-3-5-sonnet-v2)": 39.9,
67
  "MLAB (gemini-exp-1206)": 12.9,
68
  "MLAB (o3-mini)": 6.2,
 
69
  "MLAB (llama3-1-405b-instruct)": 11.5,
70
  "Human Idea + MLAB (gpt-4o)": 8.8
71
  }
 
60
  "MLAB (llama3-1-405b-instruct)": 6.2
61
  },
62
  "backdoor-trigger-recovery": {
 
 
63
  "CoI-Agent (o1) + MLAB (gpt-4o)": 4.0,
64
+ "Top Human in Competition": 100.0,
65
  "MLAB (claude-3-5-sonnet-v2)": 39.9,
66
  "MLAB (gemini-exp-1206)": 12.9,
67
  "MLAB (o3-mini)": 6.2,
68
+ "MLAB (gpt-4o)": 10.4,
69
  "MLAB (llama3-1-405b-instruct)": 11.5,
70
  "Human Idea + MLAB (gpt-4o)": 8.8
71
  }