|
import { Benchmark } from "./types"; |
|
|
|
|
|
export const googleBenchmarks: Benchmark[] = [ |
|
{ |
|
model: "Gemini Diffusion", |
|
provider: "Google", |
|
inputPrice: 0, |
|
outputPrice: 0, |
|
benchmark: { |
|
livecodebench_v6: 30.9, |
|
bigcodebench: 45.4, |
|
lbpp_v2: 56.8, |
|
swe_bench_verified: 22.9, |
|
humaneval: 89.6, |
|
mbpp: 76.0, |
|
gpqa_diamond: 40.4, |
|
aime_2025: 23.3, |
|
bigbench_extra_hard: 15.0, |
|
global_mmlu_lite: 69.1, |
|
}, |
|
source: "https://deepmind.google/models/gemini-diffusion/", |
|
}, |
|
{ |
|
model: "Gemini 2.0 Flash-Lite", |
|
provider: "Google", |
|
inputPrice: 0.10, |
|
outputPrice: 0.40, |
|
benchmark: { |
|
livecodebench_v6: 28.5, |
|
bigcodebench: 45.8, |
|
lbpp_v2: 56.0, |
|
swe_bench_verified: 28.5, |
|
humaneval: 90.2, |
|
mbpp: 75.8, |
|
gpqa_diamond: 56.5, |
|
aime_2025: 20.0, |
|
bigbench_extra_hard: 21.0, |
|
global_mmlu_lite: 79.0, |
|
}, |
|
source: "https://deepmind.google/models/gemini-diffusion/", |
|
}, |
|
|
|
{ |
|
model: "Gemini 2.5 Flash Preview (05-20)", |
|
provider: "Google", |
|
inputPrice: 0.15, |
|
outputPrice: 3.5, |
|
source: "https://ai.google.dev/gemini-api/docs/thinking", |
|
benchmark: { |
|
aime_2025: 72.0, |
|
gpqa_diamond: 82.8, |
|
simpleqa: 26.9, |
|
global_mmlu_lite: 88.4, |
|
swe_bench_verified: 60.4, |
|
livecodebench_v6: 63.9, |
|
mmmu: 79.7, |
|
lbpp_v2: 61.9, |
|
bigcodebench: 56.7, |
|
facts_grounding: 85.3, |
|
humanitys_last_exam: 11.0, |
|
mrcr_v2_avg_128k: 74.0, |
|
mrcr_v2_pointwise_1m: 32.0, |
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Flash Preview (04-17) Thinking", |
|
provider: "Google", |
|
inputPrice: 0.15, |
|
outputPrice: 3.5, |
|
source: "https://ai.google.dev/gemini-api/docs/thinking", |
|
benchmark: { |
|
aime_2025: 78.0, |
|
gpqa_diamond: 78.3, |
|
simpleqa: 29.7, |
|
global_mmlu_lite: 88.4, |
|
livecodebench_v6: 63.5, |
|
lbpp_v2: 51.1, |
|
bigcodebench: 44.2, |
|
mmmu: 76.7, |
|
humanitys_last_exam: 12.1 |
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.0 Flash", |
|
provider: "Google", |
|
inputPrice: 0.1, |
|
outputPrice: 0.4, |
|
source: "https://ai.google.dev/gemini-api/docs/thinking", |
|
benchmark: { |
|
aime_2025: 27.5, |
|
gpqa_diamond: 60.1, |
|
simpleqa: 29.9, |
|
global_mmlu_lite: 83.4, |
|
livecodebench_v6: 34.5, |
|
lbpp_v2: 22.2, |
|
mmmu: 71.7, |
|
facts_grounding: 84.6, |
|
humanitys_last_exam: 5.1, |
|
mrcr_v2_avg_128k: 36.0, |
|
mrcr_v2_pointwise_1m: 6.0, |
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Pro Preview (05-06)", |
|
provider: "Google", |
|
inputPrice: 2.5, |
|
outputPrice: 15.0, |
|
source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/", |
|
benchmark: { |
|
humanitys_last_exam: 17.8, |
|
gpqa_diamond: 83.0, |
|
aime_2025: 83.0, |
|
livecodebench_v6: 75.6, |
|
lbpp_v2: 76.5, |
|
bigcodebench: 72.7, |
|
swe_bench_verified: 63.2, |
|
simpleqa: 50.8, |
|
mmmu: 79.6, |
|
|
|
video_mme: 84.8, |
|
mrcr_v2_avg_128k: 93.0, |
|
mrcr_v2_pointwise_1m: 82.9, |
|
global_mmlu_lite: 88.6, |
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Pro Experimental (03-25)", |
|
provider: "Google", |
|
inputPrice: 2.5, |
|
outputPrice: 15.0, |
|
source: "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", |
|
benchmark: { |
|
humanitys_last_exam: 18.8, |
|
gpqa_diamond: 84.0, |
|
aime_2025: 86.7, |
|
livecodebench_v6: 70.4, |
|
lbpp_v2: 74.0, |
|
bigcodebench: 68.6, |
|
swe_bench_verified: 63.8, |
|
simpleqa: 52.9, |
|
mmmu: 81.7, |
|
mrcr_v2_avg_128k: 94.5, |
|
mrcr_v2_pointwise_1m: 83.1, |
|
global_mmlu_lite: 89.8, |
|
}, |
|
}, |
|
|
|
|
|
]; |
|
|