import { Benchmark } from "./types"; export const openaiBenchmarks: Benchmark[] = [ { model: "GPT-4o-2024-11-20", provider: "OpenAI", inputPrice: 2.5, outputPrice: 10.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 85.7, gpqa: 46.0, humaneval: 90.2, simpleqa: 38.8, // math: 68.5, // mgsm: 90.3, // drop: 81.5, }, }, { model: "GPT-4o-2024-08-06", provider: "OpenAI", inputPrice: 2.5, outputPrice: 10.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 88.7, gpqa: 53.1, humaneval: 90.2, simpleqa: 40.1, // math: 75.9, // mgsm: 90.0, // drop: 79.8, }, }, { model: "GPT-4o-2024-05-13", provider: "OpenAI", inputPrice: 5.0, outputPrice: 15.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 87.2, gpqa: 49.9, humaneval: 91.0, simpleqa: 39.0, // math: 76.6, // mgsm: 89.9, // drop: 83.7, }, }, { model: "GPT-4o-mini-2024-07-18", provider: "OpenAI", inputPrice: 0.15, outputPrice: 0.60, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 82.0, gpqa: 40.2, humaneval: 87.2, mmmu: 59.4, simpleqa: 9.5, // mgsm: 87.0, // drop: 79.7, // math: 70.2, }, }, { model: "GPT-4.1-2025-04-14", provider: "OpenAI", inputPrice: 2.0, outputPrice: 8.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 90.2, gpqa: 66.3, humaneval: 94.5, simpleqa: 41.6, // math: 82.1, // mgsm: 86.9, // drop: 79.4, }, }, { model: "GPT-4.1-mini-2025-04-14", provider: "OpenAI", inputPrice: 0.4, outputPrice: 1.6, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 87.5, gpqa: 65.0, humaneval: 93.8, simpleqa: 16.8, // math: 81.4, // mgsm: 88.2, // drop: 81.0, }, }, { model: "GPT-4.1-nano-2025-04-14", provider: "OpenAI", inputPrice: 0.1, outputPrice: 0.4, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 80.1, gpqa: 50.3, humaneval: 87.0, simpleqa: 7.6, // math: 62.3, // mgsm: 73.0, // drop: 82.2, }, }, { model: "GPT-4.5-preview-2025-02-27", provider: "OpenAI", inputPrice: 75.0, outputPrice: 150.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 90.8, gpqa: 69.5, simpleqa: 62.5, humaneval: 88.6, // mgsm: 86.9, // drop: 83.4, // math: 87.1, }, }, { model: "GPT-4-turbo-2024-04-09", provider: "OpenAI", inputPrice: 10.0, outputPrice: 30.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 86.7, gpqa: 49.3, humaneval: 88.2, simpleqa: 24.2, // math: 73.4, // mgsm: 89.6, // drop: 86.0, }, }, { model: "GPT-4-0125-preview", provider: "OpenAI", inputPrice: 10.0, outputPrice: 30.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 85.4, gpqa: 41.4, humaneval: 86.6, // math: 64.5, // mgsm: 85.1, // drop: 81.5, }, }, { model: "GPT-4-1106-preview", provider: "OpenAI", inputPrice: 10.0, outputPrice: 30.0, source: "https://github.com/openai/simple-evals", benchmark: { mmlu: 84.7, gpqa: 42.5, humaneval: 83.7, // math: 64.3, // mgsm: 87.1, // drop: 83.2, }, }, ];