llm-pricing-calculator

Running

App Files Files Community

Presidentlin commited on 2 days ago

Commit

be79a27

1 Parent(s): f23b928

x

Browse files

Files changed (3) hide show

src/App.tsx +22 -19
src/lib/benchmarks/google.ts +111 -0
src/lib/benchmarks/types.ts +10 -2

src/App.tsx CHANGED Viewed

@@ -125,30 +125,33 @@ const App: React.FC = () => {
   ]);
-  const sortedBenchmarkedModels = useMemo(() => {
-    if (!benchmarkSortConfig) return filteredBenchmarkedModels;
-    return [...benchmarkedModels].sort((a, b) => {
-      const key = benchmarkSortConfig.key;
-      const aVal = key === "provider" || key === "name"
-        ? (a as any)[key]?.toLowerCase?.() ?? ""
-        : a.benchmark?.[key] ?? -Infinity;
-      const bVal = key === "provider" || key === "name"
-        ? (b as any)[key]?.toLowerCase?.() ?? ""
-        : b.benchmark?.[key] ?? -Infinity;
-      if (typeof aVal === "string" && typeof bVal === "string") {
-        return benchmarkSortConfig.direction === "ascending"
-          ? aVal.localeCompare(bVal)
-          : bVal.localeCompare(aVal);
-      }
       return benchmarkSortConfig.direction === "ascending"
-        ? aVal - bVal
-        : bVal - aVal;
-    });
-  }, [filteredBenchmarkedModels, benchmarkSortConfig]);
   const pricingProviders = useMemo(() => {
     const grouped: Record<string, FlattenedModel[]> = {};

   ]);
+const sortedBenchmarkedModels = useMemo(() => {
+  if (!benchmarkSortConfig) return filteredBenchmarkedModels;
+  return [...filteredBenchmarkedModels].sort((a, b) => {
+    const key = benchmarkSortConfig.key;
+    const isTopLevelKey = ["provider", "name", "inputPrice", "outputPrice"].includes(key);
+    const aVal = isTopLevelKey
+      ? (a as any)[key]
+      : a.benchmark?.[key] ?? -Infinity;
+    const bVal = isTopLevelKey
+      ? (b as any)[key]
+      : b.benchmark?.[key] ?? -Infinity;
+    if (typeof aVal === "string" && typeof bVal === "string") {
       return benchmarkSortConfig.direction === "ascending"
+        ? aVal.localeCompare(bVal)
+        : bVal.localeCompare(aVal);
+    }
+    return benchmarkSortConfig.direction === "ascending"
+      ? aVal - bVal
+      : bVal - aVal;
+  });
+}, [filteredBenchmarkedModels, benchmarkSortConfig]);
   const pricingProviders = useMemo(() => {
     const grouped: Record<string, FlattenedModel[]> = {};

src/lib/benchmarks/google.ts CHANGED Viewed

@@ -40,4 +40,115 @@ export const googleBenchmarks: Benchmark[] = [
         },
         source: "https://deepmind.google/models/gemini-diffusion/",
     },
 ];

         },
         source: "https://deepmind.google/models/gemini-diffusion/",
     },
+    {
+        model: "Gemini 2.5 Flash Preview (05-20)",
+        provider: "Google",
+        inputPrice: 0.15,
+        outputPrice: 3.5,
+        source: "https://ai.google.dev/gemini-api/docs/thinking",
+        benchmark: {
+            aime_2025: 72.0,
+            gpqa_diamond: 82.8,
+            simpleqa: 26.9,
+            global_mmlu_lite: 88.4,
+            swe_bench_verified: 60.4,
+            livecodebench_v6: 63.9,
+            mmmu: 79.7,
+            lbpp_v2: 61.9,
+            bigcodebench: 56.7,
+            facts_grounding: 85.3,
+            humanitys_last_exam: 11.0,
+            mrcr_v2_avg_128k: 74.0,
+            mrcr_v2_pointwise_1m: 32.0,
+            vibe_eval_reka: 65.4,
+        },
+    },
+    {
+        model: "Gemini 2.5 Flash Preview (04-17) Thinking",
+        provider: "Google",
+        inputPrice: 0.15,
+        outputPrice: 3.5,
+        source: "https://ai.google.dev/gemini-api/docs/thinking",
+        benchmark: {
+            aime_2025: 78.0,
+            gpqa_diamond: 78.3,
+            simpleqa: 29.7,
+            global_mmlu_lite: 88.4,
+            livecodebench_v6: 63.5,
+            lbpp_v2: 51.1,
+            bigcodebench: 44.2,
+            mmmu: 76.7,
+            humanitys_last_exam: 12.1,
+            vibe_eval_reka: 62.0,
+        },
+    },
+    {
+        model: "Gemini 2.0 Flash",
+        provider: "Google",
+        inputPrice: 0.1,
+        outputPrice: 0.4,
+        source: "https://ai.google.dev/gemini-api/docs/thinking",
+        benchmark: {
+            aime_2025: 27.5,
+            gpqa_diamond: 60.1,
+            simpleqa: 29.9,
+            global_mmlu_lite: 83.4,
+            livecodebench_v6: 34.5,
+            lbpp_v2: 22.2,
+            mmmu: 71.7,
+            facts_grounding: 84.6,
+            humanitys_last_exam: 5.1,
+            mrcr_v2_avg_128k: 36.0,
+            mrcr_v2_pointwise_1m: 6.0,
+            vibe_eval_reka: 56.4,
+        },
+    },
+    {
+        model: "Gemini 2.5 Pro Preview (05-06)",
+        provider: "Google",
+        inputPrice: 2.5,
+        outputPrice: 15.0,
+        source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/",
+        benchmark: {
+            humanitys_last_exam: 17.8,
+            gpqa_diamond: 83.0,
+            aime_2025: 83.0,
+            livecodebench_v6: 75.6,
+            lbpp_v2: 76.5,
+            bigcodebench: 72.7,
+            swe_bench_verified: 63.2,
+            simpleqa: 50.8,
+            mmmu: 79.6,
+            vibe_eval_reka: 65.6,
+            video_mme: 84.8,
+            mrcr_v2_avg_128k: 93.0,
+            mrcr_v2_pointwise_1m: 82.9,
+            global_mmlu_lite: 88.6,
+        },
+    },
+    {
+        model: "Gemini 2.5 Pro Experimental (03-25)",
+        provider: "Google",
+        inputPrice: 2.5,
+        outputPrice: 15.0,
+        source: "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
+        benchmark: {
+            humanitys_last_exam: 18.8,
+            gpqa_diamond: 84.0,
+            aime_2025: 86.7,
+            livecodebench_v6: 70.4,
+            lbpp_v2: 74.0,
+            bigcodebench: 68.6,
+            swe_bench_verified: 63.8,
+            simpleqa: 52.9,
+            mmmu: 81.7,
+            vibe_eval_reka: 69.4,
+            mrcr_v2_avg_128k: 94.5,
+            mrcr_v2_pointwise_1m: 83.1,
+            global_mmlu_lite: 89.8,
+        },
+    },
 ];

src/lib/benchmarks/types.ts CHANGED Viewed

@@ -16,13 +16,21 @@ export type BenchmarkMetric =
   | "humaneval"
   | "mbpp"
   | "bigbench_extra_hard"
-  | "global_mmlu_lite";
 export interface Benchmark {
   model: string;
   provider: string;
   benchmark: Partial<Record<BenchmarkMetric, number>>;
- inputPrice: number;
   outputPrice: number;
   source: string;
   version?: string;

   | "humaneval"
   | "mbpp"
   | "bigbench_extra_hard"
+  | "global_mmlu_lite"
+  // ADD THESE:
+  | "facts_grounding"
+  | "humanitys_last_exam"
+  | "mrcr_v2_avg_128k"
+  | "mrcr_v2_pointwise_1m"
+  | "video_mme"
+  | "vibe_eval_reka";
 export interface Benchmark {
   model: string;
   provider: string;
   benchmark: Partial<Record<BenchmarkMetric, number>>;
+  inputPrice: number;
   outputPrice: number;
   source: string;
   version?: string;