|
import { Benchmark } from "./types"; |
|
|
|
export const openaiBenchmarks: Benchmark[] = [ |
|
{ |
|
model: "GPT-4o-2024-11-20", |
|
provider: "OpenAI", |
|
inputPrice: 2.5, |
|
outputPrice: 10.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 85.7, |
|
gpqa: 46.0, |
|
humaneval: 90.2, |
|
simpleqa: 38.8, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4o-2024-08-06", |
|
provider: "OpenAI", |
|
inputPrice: 2.5, |
|
outputPrice: 10.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 88.7, |
|
gpqa: 53.1, |
|
humaneval: 90.2, |
|
simpleqa: 40.1, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4o-2024-05-13", |
|
provider: "OpenAI", |
|
inputPrice: 5.0, |
|
outputPrice: 15.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 87.2, |
|
gpqa: 49.9, |
|
humaneval: 91.0, |
|
simpleqa: 39.0, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4o-mini-2024-07-18", |
|
provider: "OpenAI", |
|
inputPrice: 0.15, |
|
outputPrice: 0.60, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 82.0, |
|
gpqa: 40.2, |
|
humaneval: 87.2, |
|
mmmu: 59.4, |
|
simpleqa: 9.5, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4.1-2025-04-14", |
|
provider: "OpenAI", |
|
inputPrice: 2.0, |
|
outputPrice: 8.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 90.2, |
|
gpqa: 66.3, |
|
humaneval: 94.5, |
|
simpleqa: 41.6, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4.1-mini-2025-04-14", |
|
provider: "OpenAI", |
|
inputPrice: 0.4, |
|
outputPrice: 1.6, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 87.5, |
|
gpqa: 65.0, |
|
humaneval: 93.8, |
|
simpleqa: 16.8, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4.1-nano-2025-04-14", |
|
provider: "OpenAI", |
|
inputPrice: 0.1, |
|
outputPrice: 0.4, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 80.1, |
|
gpqa: 50.3, |
|
humaneval: 87.0, |
|
simpleqa: 7.6, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4.5-preview-2025-02-27", |
|
provider: "OpenAI", |
|
inputPrice: 75.0, |
|
outputPrice: 150.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 90.8, |
|
gpqa: 69.5, |
|
simpleqa: 62.5, |
|
humaneval: 88.6, |
|
|
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4-turbo-2024-04-09", |
|
provider: "OpenAI", |
|
inputPrice: 10.0, |
|
outputPrice: 30.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 86.7, |
|
gpqa: 49.3, |
|
humaneval: 88.2, |
|
simpleqa: 24.2, |
|
|
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4-0125-preview", |
|
provider: "OpenAI", |
|
inputPrice: 10.0, |
|
outputPrice: 30.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 85.4, |
|
gpqa: 41.4, |
|
humaneval: 86.6, |
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "GPT-4-1106-preview", |
|
provider: "OpenAI", |
|
inputPrice: 10.0, |
|
outputPrice: 30.0, |
|
source: "https://github.com/openai/simple-evals", |
|
benchmark: { |
|
mmlu: 84.7, |
|
gpqa: 42.5, |
|
humaneval: 83.7, |
|
|
|
|
|
|
|
}, |
|
}, |
|
]; |
|
|