Presidentlin's picture
x
a9899bf
raw
history blame
4.35 kB
import { Benchmark } from "./types";
export const googleBenchmarks: Benchmark[] = [
{
model: "Gemini Diffusion",
provider: "Google",
inputPrice: 0,
outputPrice: 0,
benchmark: {
livecodebench_v6: 30.9,
bigcodebench: 45.4,
lbpp_v2: 56.8,
swe_bench_verified: 22.9,
humaneval: 89.6,
mbpp: 76.0,
gpqa_diamond: 40.4,
aime_2025: 23.3,
bigbench_extra_hard: 15.0,
global_mmlu_lite: 69.1,
},
source: "https://deepmind.google/models/gemini-diffusion/",
},
{
model: "Gemini 2.0 Flash-Lite",
provider: "Google",
inputPrice: 0.10,
outputPrice: 0.40,
benchmark: {
livecodebench_v6: 28.5,
bigcodebench: 45.8,
lbpp_v2: 56.0,
swe_bench_verified: 28.5,
humaneval: 90.2,
mbpp: 75.8,
gpqa_diamond: 56.5,
aime_2025: 20.0,
bigbench_extra_hard: 21.0,
global_mmlu_lite: 79.0,
},
source: "https://deepmind.google/models/gemini-diffusion/",
},
{
model: "Gemini 2.5 Flash Preview (05-20)",
provider: "Google",
inputPrice: 0.15,
outputPrice: 3.5,
source: "https://ai.google.dev/gemini-api/docs/thinking",
benchmark: {
aime_2025: 72.0,
gpqa_diamond: 82.8,
simpleqa: 26.9,
global_mmlu_lite: 88.4,
swe_bench_verified: 60.4,
livecodebench_v6: 63.9,
mmmu: 79.7,
lbpp_v2: 61.9,
bigcodebench: 56.7,
facts_grounding: 85.3,
humanitys_last_exam: 11.0,
mrcr_v2_avg_128k: 74.0,
mrcr_v2_pointwise_1m: 32.0,
},
},
{
model: "Gemini 2.5 Flash Preview (04-17) Thinking",
provider: "Google",
inputPrice: 0.15,
outputPrice: 3.5,
source: "https://ai.google.dev/gemini-api/docs/thinking",
benchmark: {
aime_2025: 78.0,
gpqa_diamond: 78.3,
simpleqa: 29.7,
global_mmlu_lite: 88.4,
livecodebench_v6: 63.5,
lbpp_v2: 51.1,
bigcodebench: 44.2,
mmmu: 76.7,
humanitys_last_exam: 12.1
},
},
{
model: "Gemini 2.0 Flash",
provider: "Google",
inputPrice: 0.1,
outputPrice: 0.4,
source: "https://ai.google.dev/gemini-api/docs/thinking",
benchmark: {
aime_2025: 27.5,
gpqa_diamond: 60.1,
simpleqa: 29.9,
global_mmlu_lite: 83.4,
livecodebench_v6: 34.5,
lbpp_v2: 22.2,
mmmu: 71.7,
facts_grounding: 84.6,
humanitys_last_exam: 5.1,
mrcr_v2_avg_128k: 36.0,
mrcr_v2_pointwise_1m: 6.0,
},
},
{
model: "Gemini 2.5 Pro Preview (05-06)",
provider: "Google",
inputPrice: 2.5,
outputPrice: 15.0,
source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/",
benchmark: {
humanitys_last_exam: 17.8,
gpqa_diamond: 83.0,
aime_2025: 83.0,
livecodebench_v6: 75.6,
lbpp_v2: 76.5,
bigcodebench: 72.7,
swe_bench_verified: 63.2,
simpleqa: 50.8,
mmmu: 79.6,
video_mme: 84.8,
mrcr_v2_avg_128k: 93.0,
mrcr_v2_pointwise_1m: 82.9,
global_mmlu_lite: 88.6,
},
},
{
model: "Gemini 2.5 Pro Experimental (03-25)",
provider: "Google",
inputPrice: 2.5,
outputPrice: 15.0,
source: "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
benchmark: {
humanitys_last_exam: 18.8,
gpqa_diamond: 84.0,
aime_2025: 86.7,
livecodebench_v6: 70.4,
lbpp_v2: 74.0,
bigcodebench: 68.6,
swe_bench_verified: 63.8,
simpleqa: 52.9,
mmmu: 81.7,
mrcr_v2_avg_128k: 94.5,
mrcr_v2_pointwise_1m: 83.1,
global_mmlu_lite: 89.8,
},
},
];