llm-pricing-calculator

Running

App Files Files Community

Presidentlin commited on 2 days ago

Commit

a9899bf

1 Parent(s): be79a27

x

Browse files

Files changed (3) hide show

src/App.tsx +25 -23
src/lib/benchmarks/google.ts +5 -6
src/lib/benchmarks/types.ts +53 -14

src/App.tsx CHANGED Viewed

@@ -9,7 +9,7 @@ import { PricingTable } from "@/components/PricingTable";
 import { BenchmarkTable } from "./components/BenchmarkTable";
 import { benchmarkData } from "./lib/benchmarks/ index";
 import { BenchmarkComparisonSelector } from "./components/BenchmarkComparisonSelector";
 export interface FlattenedModel extends Model {
   provider: string;
@@ -125,32 +125,32 @@ const App: React.FC = () => {
   ]);
-const sortedBenchmarkedModels = useMemo(() => {
-  if (!benchmarkSortConfig) return filteredBenchmarkedModels;
-  return [...filteredBenchmarkedModels].sort((a, b) => {
-    const key = benchmarkSortConfig.key;
-    const isTopLevelKey = ["provider", "name", "inputPrice", "outputPrice"].includes(key);
-    const aVal = isTopLevelKey
-      ? (a as any)[key]
-      : a.benchmark?.[key] ?? -Infinity;
-    const bVal = isTopLevelKey
-      ? (b as any)[key]
-      : b.benchmark?.[key] ?? -Infinity;
-    if (typeof aVal === "string" && typeof bVal === "string") {
-      return benchmarkSortConfig.direction === "ascending"
-        ? aVal.localeCompare(bVal)
-        : bVal.localeCompare(aVal);
-    }
-    return benchmarkSortConfig.direction === "ascending"
-      ? aVal - bVal
-      : bVal - aVal;
-  });
-}, [filteredBenchmarkedModels, benchmarkSortConfig]);
   const pricingProviders = useMemo(() => {
@@ -305,7 +305,9 @@ const sortedBenchmarkedModels = useMemo(() => {
         {/* Benchmark Table */}
         <h3 className="text-lg font-semibold mt-12 mb-2">Select Benchmark Metrics to Compare</h3>
         <BenchmarkComparisonSelector
-          allMetrics={Array.from(new Set(benchmarkedModels.flatMap((m) => Object.keys(m.benchmark ?? {})))).sort()}
           selected={benchmarkComparisonMetrics}
           onChange={(metric, checked) =>
             setBenchmarkComparisonMetrics((prev) =>

 import { BenchmarkTable } from "./components/BenchmarkTable";
 import { benchmarkData } from "./lib/benchmarks/ index";
 import { BenchmarkComparisonSelector } from "./components/BenchmarkComparisonSelector";
+import { benchmarkMetricOrder } from "./lib/benchmarks/types";
 export interface FlattenedModel extends Model {
   provider: string;
   ]);
+  const sortedBenchmarkedModels = useMemo(() => {
+    if (!benchmarkSortConfig) return filteredBenchmarkedModels;
+    return [...filteredBenchmarkedModels].sort((a, b) => {
+      const key = benchmarkSortConfig.key;
+      const isTopLevelKey = ["provider", "name", "inputPrice", "outputPrice"].includes(key);
+      const aVal = isTopLevelKey
+        ? (a as any)[key]
+        : a.benchmark?.[key] ?? -Infinity;
+      const bVal = isTopLevelKey
+        ? (b as any)[key]
+        : b.benchmark?.[key] ?? -Infinity;
+      if (typeof aVal === "string" && typeof bVal === "string") {
+        return benchmarkSortConfig.direction === "ascending"
+          ? aVal.localeCompare(bVal)
+          : bVal.localeCompare(aVal);
+      }
+      return benchmarkSortConfig.direction === "ascending"
+        ? aVal - bVal
+        : bVal - aVal;
+    });
+  }, [filteredBenchmarkedModels, benchmarkSortConfig]);
   const pricingProviders = useMemo(() => {
         {/* Benchmark Table */}
         <h3 className="text-lg font-semibold mt-12 mb-2">Select Benchmark Metrics to Compare</h3>
         <BenchmarkComparisonSelector
+          allMetrics={benchmarkMetricOrder.filter(
+            (metric) => benchmarkedModels.some((m) => m.benchmark?.[metric] !== undefined)
+          )}
           selected={benchmarkComparisonMetrics}
           onChange={(metric, checked) =>
             setBenchmarkComparisonMetrics((prev) =>

src/lib/benchmarks/google.ts CHANGED Viewed

@@ -61,7 +61,7 @@ export const googleBenchmarks: Benchmark[] = [
             humanitys_last_exam: 11.0,
             mrcr_v2_avg_128k: 74.0,
             mrcr_v2_pointwise_1m: 32.0,
-            vibe_eval_reka: 65.4,
         },
     },
     {
@@ -79,8 +79,8 @@ export const googleBenchmarks: Benchmark[] = [
             lbpp_v2: 51.1,
             bigcodebench: 44.2,
             mmmu: 76.7,
-            humanitys_last_exam: 12.1,
-            vibe_eval_reka: 62.0,
         },
     },
     {
@@ -101,7 +101,7 @@ export const googleBenchmarks: Benchmark[] = [
             humanitys_last_exam: 5.1,
             mrcr_v2_avg_128k: 36.0,
             mrcr_v2_pointwise_1m: 6.0,
-            vibe_eval_reka: 56.4,
         },
     },
     {
@@ -120,7 +120,7 @@ export const googleBenchmarks: Benchmark[] = [
             swe_bench_verified: 63.2,
             simpleqa: 50.8,
             mmmu: 79.6,
-            vibe_eval_reka: 65.6,
             video_mme: 84.8,
             mrcr_v2_avg_128k: 93.0,
             mrcr_v2_pointwise_1m: 82.9,
@@ -143,7 +143,6 @@ export const googleBenchmarks: Benchmark[] = [
             swe_bench_verified: 63.8,
             simpleqa: 52.9,
             mmmu: 81.7,
-            vibe_eval_reka: 69.4,
             mrcr_v2_avg_128k: 94.5,
             mrcr_v2_pointwise_1m: 83.1,
             global_mmlu_lite: 89.8,

             humanitys_last_exam: 11.0,
             mrcr_v2_avg_128k: 74.0,
             mrcr_v2_pointwise_1m: 32.0,
         },
     },
     {
             lbpp_v2: 51.1,
             bigcodebench: 44.2,
             mmmu: 76.7,
+            humanitys_last_exam: 12.1
         },
     },
     {
             humanitys_last_exam: 5.1,
             mrcr_v2_avg_128k: 36.0,
             mrcr_v2_pointwise_1m: 6.0,
         },
     },
     {
             swe_bench_verified: 63.2,
             simpleqa: 50.8,
             mmmu: 79.6,
             video_mme: 84.8,
             mrcr_v2_avg_128k: 93.0,
             mrcr_v2_pointwise_1m: 82.9,
             swe_bench_verified: 63.8,
             simpleqa: 52.9,
             mmmu: 81.7,
             mrcr_v2_avg_128k: 94.5,
             mrcr_v2_pointwise_1m: 83.1,
             global_mmlu_lite: 89.8,

src/lib/benchmarks/types.ts CHANGED Viewed

@@ -1,30 +1,36 @@
 export type BenchmarkMetric =
-  | "aime_24"
-  | "aime_2025"
-  | "gpqa"
-  | "gpqa_diamond"
-  | "lcb"
   | "mmlu_pro"
   | "loft"
-  | "simpleqa"
   | "mmmu"
-  | "egoschema"
-  | "livecodebench_v6"
-  | "bigcodebench"
-  | "lbpp_v2"
-  | "swe_bench_verified"
   | "humaneval"
   | "mbpp"
   | "bigbench_extra_hard"
   | "global_mmlu_lite"
-  // ADD THESE:
   | "facts_grounding"
   | "humanitys_last_exam"
   | "mrcr_v2_avg_128k"
   | "mrcr_v2_pointwise_1m"
-  | "video_mme"
-  | "vibe_eval_reka";
 export interface Benchmark {
   model: string;
@@ -35,3 +41,36 @@ export interface Benchmark {
   source: string;
   version?: string;
 }

 export type BenchmarkMetric =
+  // Most common and high-priority
+  | "simpleqa"
   | "mmlu_pro"
+  | "gpqa"
+  | "egoschema"
   | "loft"
   | "mmmu"
+  | "lcb"
+  | "aime_24"
+  | "aime_2025"
+  | "gpqa_diamond"
+  // Code benchmarks (frequent)
   | "humaneval"
   | "mbpp"
+  | "bigcodebench"
+  | "livecodebench_v6"
+  | "swe_bench_verified"
+  | "lbpp_v2"
+  // General reasoning & robustness
   | "bigbench_extra_hard"
   | "global_mmlu_lite"
+  // Optional: less frequent but still potentially useful
   | "facts_grounding"
   | "humanitys_last_exam"
   | "mrcr_v2_avg_128k"
   | "mrcr_v2_pointwise_1m"
+  | "video_mme";
+// Note: "vibe_eval_reka" is intentionally excluded for now.
 export interface Benchmark {
   model: string;
   source: string;
   version?: string;
 }
+export const benchmarkMetricOrder: BenchmarkMetric[] = [
+  // Most common and high-priority
+  "simpleqa",
+  "mmlu_pro",
+  "gpqa",
+  "egoschema",
+  "loft",
+  "mmmu",
+  "lcb",
+  "aime_24",
+  "aime_2025",
+  "gpqa_diamond",
+  // // Code benchmarks (frequent)
+  // "humaneval",
+  // "mbpp",
+  // "bigcodebench",
+  // "livecodebench_v6",
+  // "swe_bench_verified",
+  // "lbpp_v2",
+  // // General reasoning & robustness
+  // "bigbench_extra_hard",
+  // "global_mmlu_lite",
+  // // Optional: less frequent but still potentially useful
+  // "facts_grounding",
+  // "humanitys_last_exam",
+  // "mrcr_v2_avg_128k",
+  // "mrcr_v2_pointwise_1m",
+  // "video_mme",
+];