Thomas G. Lopes commited on
Commit
f415c95
·
unverified ·
1 Parent(s): f36471e

Accurate max tokens (#77)

Browse files
.env.example ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ HYPERBOLIC_API_KEY=
2
+ COHERE_API_KEY=
3
+ TOGETHER_API_KEY=
4
+ FIREWORKS_API_KEY=
5
+ REPLICATE_API_KEY=
6
+ NEBIUS_API_KEY=
7
+ NOVITA_API_KEY=
8
+ FAL_API_KEY=
9
+ HF_TOKEN=
.prettierignore CHANGED
@@ -16,3 +16,5 @@ node_modules
16
  # Ignore files for PNPM, NPM and YARN
17
  pnpm-lock.yaml
18
  yarn.lock
 
 
 
16
  # Ignore files for PNPM, NPM and YARN
17
  pnpm-lock.yaml
18
  yarn.lock
19
+
20
+ context_length.json
eslint.config.mts CHANGED
@@ -86,6 +86,7 @@ export default ts.config(
86
  "**/pnpm-lock.yaml",
87
  "**/package-lock.json",
88
  "**/yarn.lock",
 
89
  ],
90
  },
91
  {
 
86
  "**/pnpm-lock.yaml",
87
  "**/package-lock.json",
88
  "**/yarn.lock",
89
+ "context_length.json",
90
  ],
91
  },
92
  {
package.json CHANGED
@@ -3,24 +3,25 @@
3
  "version": "0.0.1",
4
  "private": true,
5
  "scripts": {
6
- "dev": "vite dev",
7
- "build": "vite build",
8
  "preview": "vite preview",
9
  "prepare": "ts-patch install && svelte-kit sync || echo ''",
10
  "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
11
  "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
12
  "lint": "prettier . --check . && eslint src/",
13
  "format": "prettier . --write .",
14
- "clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'"
 
15
  },
16
  "devDependencies": {
17
  "@eslint/eslintrc": "^3.3.0",
18
  "@eslint/js": "^9.22.0",
19
  "@floating-ui/dom": "^1.6.13",
20
  "@huggingface/hub": "^1.0.1",
21
- "@huggingface/transformers": "^3.4.2",
22
  "@huggingface/inference": "^3.5.1",
23
  "@huggingface/tasks": "^0.17.1",
 
24
  "@iconify-json/carbon": "^1.2.8",
25
  "@iconify-json/material-symbols": "^1.2.15",
26
  "@ryoppippi/unplugin-typia": "^1.0.0",
@@ -31,7 +32,9 @@
31
  "@sveltejs/vite-plugin-svelte": "^4.0.0",
32
  "@tailwindcss/container-queries": "^0.1.1",
33
  "@tailwindcss/postcss": "^4.0.9",
 
34
  "clsx": "^2.1.1",
 
35
  "eslint": "^9.22.0",
36
  "eslint-config-prettier": "^10.1.1",
37
  "eslint-plugin-prettier": "^5.2.3",
 
3
  "version": "0.0.1",
4
  "private": true,
5
  "scripts": {
6
+ "dev": "pnpm run update-ctx-length && vite dev",
7
+ "build": "pnpm run update-ctx-length && vite build",
8
  "preview": "vite preview",
9
  "prepare": "ts-patch install && svelte-kit sync || echo ''",
10
  "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
11
  "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
12
  "lint": "prettier . --check . && eslint src/",
13
  "format": "prettier . --write .",
14
+ "clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
15
+ "update-ctx-length": "jiti scripts/update-ctx-length.ts"
16
  },
17
  "devDependencies": {
18
  "@eslint/eslintrc": "^3.3.0",
19
  "@eslint/js": "^9.22.0",
20
  "@floating-ui/dom": "^1.6.13",
21
  "@huggingface/hub": "^1.0.1",
 
22
  "@huggingface/inference": "^3.5.1",
23
  "@huggingface/tasks": "^0.17.1",
24
+ "@huggingface/transformers": "^3.4.2",
25
  "@iconify-json/carbon": "^1.2.8",
26
  "@iconify-json/material-symbols": "^1.2.15",
27
  "@ryoppippi/unplugin-typia": "^1.0.0",
 
32
  "@sveltejs/vite-plugin-svelte": "^4.0.0",
33
  "@tailwindcss/container-queries": "^0.1.1",
34
  "@tailwindcss/postcss": "^4.0.9",
35
+ "@types/node": "^22.14.1",
36
  "clsx": "^2.1.1",
37
+ "dotenv": "^16.5.0",
38
  "eslint": "^9.22.0",
39
  "eslint-config-prettier": "^10.1.1",
40
  "eslint-plugin-prettier": "^5.2.3",
pnpm-lock.yaml CHANGED
@@ -44,31 +44,37 @@ importers:
44
  version: 1.2.15
45
  '@ryoppippi/unplugin-typia':
46
  specifier: ^1.0.0
47
48
  '@samchon/openapi':
49
  specifier: ^3.0.0
50
  version: 3.0.0
51
  '@sveltejs/adapter-auto':
52
  specifier: ^3.2.2
53
- version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected])))
54
  '@sveltejs/adapter-node':
55
  specifier: ^5.2.0
56
- version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected])))
57
  '@sveltejs/kit':
58
  specifier: ^2.5.27
59
- version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected]))
60
  '@sveltejs/vite-plugin-svelte':
61
  specifier: ^4.0.0
62
- version: 4.0.4([email protected])([email protected](@types/node@18.19.84)([email protected]))
63
  '@tailwindcss/container-queries':
64
  specifier: ^0.1.1
65
  version: 0.1.1([email protected])
66
  '@tailwindcss/postcss':
67
  specifier: ^4.0.9
68
  version: 4.0.9
 
 
 
69
  clsx:
70
  specifier: ^2.1.1
71
  version: 2.1.1
 
 
 
72
  eslint:
73
  specifier: ^9.22.0
74
  version: 9.22.0([email protected])
@@ -137,7 +143,7 @@ importers:
137
  version: 22.1.0([email protected])
138
  vite:
139
  specifier: ^5.4.4
140
- version: 5.4.14(@types/node@18.19.84)([email protected])
141
 
142
  packages:
143
 
@@ -1001,6 +1007,9 @@ packages:
1001
  '@types/[email protected]':
1002
  resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
1003
 
 
 
 
1004
  '@types/[email protected]':
1005
  resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
1006
 
@@ -1284,6 +1293,10 @@ packages:
1284
  resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
1285
  engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
1286
 
 
 
 
 
1287
1288
  resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
1289
  engines: {node: '>=4'}
@@ -2404,6 +2417,9 @@ packages:
2404
2405
  resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
2406
 
 
 
 
2407
2408
  resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
2409
  peerDependencies:
@@ -3087,7 +3103,7 @@ snapshots:
3087
  '@rollup/[email protected]':
3088
  optional: true
3089
 
3090
3091
  dependencies:
3092
  '@rollup/pluginutils': 5.1.4([email protected])
3093
  consola: 3.4.0
@@ -3101,7 +3117,7 @@ snapshots:
3101
  typescript: 5.6.3
3102
  typia: 7.6.4(@samchon/[email protected])([email protected])
3103
  unplugin: 1.16.1
3104
- vite: 6.2.1(@types/node@18.19.84)([email protected])([email protected])([email protected])
3105
  transitivePeerDependencies:
3106
  - '@samchon/openapi'
3107
  - '@types/node'
@@ -3125,22 +3141,22 @@ snapshots:
3125
  dependencies:
3126
  acorn: 8.14.0
3127
 
3128
3129
  dependencies:
3130
- '@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected]))
3131
  import-meta-resolve: 4.1.0
3132
 
3133
3134
  dependencies:
3135
  '@rollup/plugin-commonjs': 28.0.2([email protected])
3136
  '@rollup/plugin-json': 6.1.0([email protected])
3137
  '@rollup/plugin-node-resolve': 16.0.0([email protected])
3138
- '@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected]))
3139
  rollup: 4.34.9
3140
 
3141
3142
  dependencies:
3143
- '@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@18.19.84)([email protected]))
3144
  '@types/cookie': 0.6.0
3145
  cookie: 0.6.0
3146
  devalue: 5.1.1
@@ -3153,27 +3169,27 @@ snapshots:
3153
  set-cookie-parser: 2.7.1
3154
  sirv: 3.0.1
3155
  svelte: 5.28.2
3156
- vite: 5.4.14(@types/node@18.19.84)([email protected])
3157
 
3158
3159
  dependencies:
3160
- '@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@18.19.84)([email protected]))
3161
  debug: 4.4.0
3162
  svelte: 5.28.2
3163
- vite: 5.4.14(@types/node@18.19.84)([email protected])
3164
  transitivePeerDependencies:
3165
  - supports-color
3166
 
3167
3168
  dependencies:
3169
- '@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@18.19.84)([email protected])))([email protected])([email protected](@types/node@18.19.84)([email protected]))
3170
  debug: 4.4.0
3171
  deepmerge: 4.3.1
3172
  kleur: 4.1.5
3173
  magic-string: 0.30.17
3174
  svelte: 5.28.2
3175
- vite: 5.4.14(@types/node@18.19.84)([email protected])
3176
- vitefu: 1.0.6([email protected](@types/node@18.19.84)([email protected]))
3177
  transitivePeerDependencies:
3178
  - supports-color
3179
 
@@ -3251,13 +3267,17 @@ snapshots:
3251
 
3252
  '@types/[email protected]':
3253
  dependencies:
3254
- '@types/node': 18.19.84
3255
  form-data: 4.0.2
3256
 
3257
  '@types/[email protected]':
3258
  dependencies:
3259
  undici-types: 5.26.5
3260
 
 
 
 
 
3261
  '@types/[email protected]': {}
3262
 
3263
@@ -3524,6 +3544,8 @@ snapshots:
3524
 
3525
3526
 
 
 
3527
3528
 
3529
@@ -4336,7 +4358,7 @@ snapshots:
4336
  '@protobufjs/path': 1.1.2
4337
  '@protobufjs/pool': 1.1.0
4338
  '@protobufjs/utf8': 1.1.0
4339
- '@types/node': 18.19.84
4340
  long: 5.3.1
4341
 
4342
@@ -4641,6 +4663,8 @@ snapshots:
4641
 
4642
4643
 
 
 
4644
4645
  dependencies:
4646
  '@antfu/install-pkg': 1.0.0
@@ -4669,31 +4693,31 @@ snapshots:
4669
 
4670
4671
 
4672
- [email protected](@types/node@18.19.84)([email protected]):
4673
  dependencies:
4674
  esbuild: 0.21.5
4675
  postcss: 8.5.3
4676
  rollup: 4.34.9
4677
  optionalDependencies:
4678
- '@types/node': 18.19.84
4679
  fsevents: 2.3.3
4680
  lightningcss: 1.29.1
4681
 
4682
4683
  dependencies:
4684
  esbuild: 0.25.1
4685
  postcss: 8.5.3
4686
  rollup: 4.34.9
4687
  optionalDependencies:
4688
- '@types/node': 18.19.84
4689
  fsevents: 2.3.3
4690
  jiti: 2.4.2
4691
  lightningcss: 1.29.1
4692
  yaml: 2.7.0
4693
 
4694
4695
  optionalDependencies:
4696
- vite: 5.4.14(@types/node@18.19.84)([email protected])
4697
 
4698
4699
  dependencies:
 
44
  version: 1.2.15
45
  '@ryoppippi/unplugin-typia':
46
  specifier: ^1.0.0
47
48
  '@samchon/openapi':
49
  specifier: ^3.0.0
50
  version: 3.0.0
51
  '@sveltejs/adapter-auto':
52
  specifier: ^3.2.2
53
+ version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
54
  '@sveltejs/adapter-node':
55
  specifier: ^5.2.0
56
+ version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
57
  '@sveltejs/kit':
58
  specifier: ^2.5.27
59
+ version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
60
  '@sveltejs/vite-plugin-svelte':
61
  specifier: ^4.0.0
62
+ version: 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
63
  '@tailwindcss/container-queries':
64
  specifier: ^0.1.1
65
  version: 0.1.1([email protected])
66
  '@tailwindcss/postcss':
67
  specifier: ^4.0.9
68
  version: 4.0.9
69
+ '@types/node':
70
+ specifier: ^22.14.1
71
+ version: 22.14.1
72
  clsx:
73
  specifier: ^2.1.1
74
  version: 2.1.1
75
+ dotenv:
76
+ specifier: ^16.5.0
77
+ version: 16.5.0
78
  eslint:
79
  specifier: ^9.22.0
80
  version: 9.22.0([email protected])
 
143
  version: 22.1.0([email protected])
144
  vite:
145
  specifier: ^5.4.4
146
+ version: 5.4.14(@types/node@22.14.1)([email protected])
147
 
148
  packages:
149
 
 
1007
  '@types/[email protected]':
1008
  resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
1009
 
1010
+ '@types/[email protected]':
1011
+ resolution: {integrity: sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==}
1012
+
1013
  '@types/[email protected]':
1014
  resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
1015
 
 
1293
  resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
1294
  engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
1295
 
1296
1297
+ resolution: {integrity: sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==}
1298
+ engines: {node: '>=12'}
1299
+
1300
1301
  resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
1302
  engines: {node: '>=4'}
 
2417
2418
  resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
2419
 
2420
2421
+ resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
2422
+
2423
2424
  resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
2425
  peerDependencies:
 
3103
  '@rollup/[email protected]':
3104
  optional: true
3105
 
3106
3107
  dependencies:
3108
  '@rollup/pluginutils': 5.1.4([email protected])
3109
  consola: 3.4.0
 
3117
  typescript: 5.6.3
3118
  typia: 7.6.4(@samchon/[email protected])([email protected])
3119
  unplugin: 1.16.1
3120
+ vite: 6.2.1(@types/node@22.14.1)([email protected])([email protected])([email protected])
3121
  transitivePeerDependencies:
3122
  - '@samchon/openapi'
3123
  - '@types/node'
 
3141
  dependencies:
3142
  acorn: 8.14.0
3143
 
3144
3145
  dependencies:
3146
+ '@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
3147
  import-meta-resolve: 4.1.0
3148
 
3149
3150
  dependencies:
3151
  '@rollup/plugin-commonjs': 28.0.2([email protected])
3152
  '@rollup/plugin-json': 6.1.0([email protected])
3153
  '@rollup/plugin-node-resolve': 16.0.0([email protected])
3154
+ '@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
3155
  rollup: 4.34.9
3156
 
3157
3158
  dependencies:
3159
+ '@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
3160
  '@types/cookie': 0.6.0
3161
  cookie: 0.6.0
3162
  devalue: 5.1.1
 
3169
  set-cookie-parser: 2.7.1
3170
  sirv: 3.0.1
3171
  svelte: 5.28.2
3172
+ vite: 5.4.14(@types/node@22.14.1)([email protected])
3173
 
3174
3175
  dependencies:
3176
+ '@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
3177
  debug: 4.4.0
3178
  svelte: 5.28.2
3179
+ vite: 5.4.14(@types/node@22.14.1)([email protected])
3180
  transitivePeerDependencies:
3181
  - supports-color
3182
 
3183
3184
  dependencies:
3185
+ '@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
3186
  debug: 4.4.0
3187
  deepmerge: 4.3.1
3188
  kleur: 4.1.5
3189
  magic-string: 0.30.17
3190
  svelte: 5.28.2
3191
+ vite: 5.4.14(@types/node@22.14.1)([email protected])
3192
+ vitefu: 1.0.6([email protected](@types/node@22.14.1)([email protected]))
3193
  transitivePeerDependencies:
3194
  - supports-color
3195
 
 
3267
 
3268
  '@types/[email protected]':
3269
  dependencies:
3270
+ '@types/node': 22.14.1
3271
  form-data: 4.0.2
3272
 
3273
  '@types/[email protected]':
3274
  dependencies:
3275
  undici-types: 5.26.5
3276
 
3277
+ '@types/[email protected]':
3278
+ dependencies:
3279
+ undici-types: 6.21.0
3280
+
3281
  '@types/[email protected]': {}
3282
 
3283
 
3544
 
3545
3546
 
3547
3548
+
3549
3550
 
3551
 
4358
  '@protobufjs/path': 1.1.2
4359
  '@protobufjs/pool': 1.1.0
4360
  '@protobufjs/utf8': 1.1.0
4361
+ '@types/node': 22.14.1
4362
  long: 5.3.1
4363
 
4364
 
4663
 
4664
4665
 
4666
4667
+
4668
4669
  dependencies:
4670
  '@antfu/install-pkg': 1.0.0
 
4693
 
4694
4695
 
4696
+ [email protected](@types/node@22.14.1)([email protected]):
4697
  dependencies:
4698
  esbuild: 0.21.5
4699
  postcss: 8.5.3
4700
  rollup: 4.34.9
4701
  optionalDependencies:
4702
+ '@types/node': 22.14.1
4703
  fsevents: 2.3.3
4704
  lightningcss: 1.29.1
4705
 
4706
4707
  dependencies:
4708
  esbuild: 0.25.1
4709
  postcss: 8.5.3
4710
  rollup: 4.34.9
4711
  optionalDependencies:
4712
+ '@types/node': 22.14.1
4713
  fsevents: 2.3.3
4714
  jiti: 2.4.2
4715
  lightningcss: 1.29.1
4716
  yaml: 2.7.0
4717
 
4718
4719
  optionalDependencies:
4720
+ vite: 5.4.14(@types/node@22.14.1)([email protected])
4721
 
4722
4723
  dependencies:
scripts/update-ctx-length.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dotenv from "dotenv";
2
+ dotenv.config(); // Load .env file into process.env
3
+
4
+ import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type
5
+ import fs from "fs/promises";
6
+ import path from "path";
7
+
8
+ const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json");
9
+
10
+ async function runUpdate() {
11
+ console.log("Starting context length cache update...");
12
+
13
+ // Gather API keys from process.env
14
+ const apiKeys: ApiKeys = {
15
+ COHERE_API_KEY: process.env.COHERE_API_KEY,
16
+ TOGETHER_API_KEY: process.env.TOGETHER_API_KEY,
17
+ FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
18
+ HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY,
19
+ REPLICATE_API_KEY: process.env.REPLICATE_API_KEY,
20
+ NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
21
+ NOVITA_API_KEY: process.env.NOVITA_API_KEY,
22
+ SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY,
23
+ };
24
+
25
+ try {
26
+ // Fetch data from all supported providers concurrently, passing keys
27
+ const fetchedData = await fetchAllProviderData(apiKeys);
28
+
29
+ // Read existing manual/cached data
30
+ let existingData = {};
31
+ try {
32
+ const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8");
33
+ existingData = JSON.parse(currentCache);
34
+ } catch {
35
+ // Remove unused variable name
36
+ console.log("No existing cache file found or error reading, creating new one.");
37
+ }
38
+
39
+ // Merge fetched data with existing data (fetched data takes precedence)
40
+ const combinedData = { ...existingData, ...fetchedData };
41
+
42
+ // Write the combined data back to the file
43
+ const tempFilePath = CACHE_FILE_PATH + ".tmp";
44
+ await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8");
45
+ await fs.rename(tempFilePath, CACHE_FILE_PATH);
46
+
47
+ console.log("Context length cache update complete.");
48
+ console.log(`Cache file written to: ${CACHE_FILE_PATH}`);
49
+ } catch (error) {
50
+ console.error("Error during context length cache update:", error);
51
+ process.exit(1); // Exit with error code
52
+ }
53
+ }
54
+
55
+ runUpdate();
src/app.css CHANGED
@@ -67,6 +67,18 @@
67
  @apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
68
  }
69
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  @utility custom-outline {
71
  @apply outline-hidden;
72
  @apply border-blue-500 ring ring-blue-500;
 
67
  @apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
68
  }
69
 
70
+ @utility btn-sm {
71
+ @apply flex h-[32px] items-center justify-center gap-1.5 rounded-md border border-gray-200 bg-white px-2.5 py-2 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
72
+ }
73
+
74
+ @utility btn-xs {
75
+ @apply flex h-[28px] items-center justify-center gap-1 rounded border border-gray-200 bg-white px-2 py-1.5 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
76
+ }
77
+
78
+ @utility btn-mini {
79
+ @apply flex h-[24px] items-center justify-center gap-0.5 rounded-sm border border-gray-200 bg-white px-1.5 py-1 text-[10px] font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-2 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
80
+ }
81
+
82
  @utility custom-outline {
83
  @apply outline-hidden;
84
  @apply border-blue-500 ring ring-blue-500;
src/lib/components/inference-playground/generation-config-settings.ts CHANGED
@@ -7,7 +7,7 @@ export type GenerationConfigKey = (typeof GENERATION_CONFIG_KEYS)[number];
7
  export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
8
 
9
  interface GenerationKeySettings {
10
- default: number;
11
  step: number;
12
  min: number;
13
  max: number;
@@ -23,7 +23,6 @@ export const GENERATION_CONFIG_SETTINGS: Record<GenerationConfigKey, GenerationK
23
  label: "Temperature",
24
  },
25
  max_tokens: {
26
- default: 2048,
27
  step: 256,
28
  min: 0,
29
  max: 8192, // changed dynamically based on model
 
7
  export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
8
 
9
  interface GenerationKeySettings {
10
+ default?: number;
11
  step: number;
12
  min: number;
13
  max: number;
 
23
  label: "Temperature",
24
  },
25
  max_tokens: {
 
26
  step: 256,
27
  min: 0,
28
  max: 8192, // changed dynamically based on model
src/lib/components/inference-playground/generation-config.svelte CHANGED
@@ -1,8 +1,10 @@
1
  <script lang="ts">
2
- import type { Conversation } from "$lib/types.js";
3
-
4
  import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
5
- import { customMaxTokens } from "./utils.js";
 
 
6
 
7
  interface Props {
8
  conversation: Conversation;
@@ -11,37 +13,58 @@
11
 
12
  let { conversation = $bindable(), classNames = "" }: Props = $props();
13
 
14
- let modelMaxLength = $derived(customMaxTokens[conversation.model.id] ?? 100000);
15
- let maxTokens = $derived(Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000));
 
 
 
 
 
 
 
 
16
  </script>
17
 
18
  <div class="flex flex-col gap-y-7 {classNames}">
19
  {#each GENERATION_CONFIG_KEYS as key}
20
  {@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
21
- {@const max = key === "max_tokens" ? maxTokens : GENERATION_CONFIG_SETTINGS[key].max}
 
 
22
  <div>
23
  <div class="flex items-center justify-between">
24
- <label for="temperature-range" class="mb-2 block text-sm font-medium text-gray-900 dark:text-white"
25
- >{label}</label
26
- >
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  <input
28
- type="number"
29
- class="w-18 rounded-sm border bg-transparent px-1 py-0.5 text-right text-sm dark:border-gray-700"
30
  {min}
31
  {max}
32
  {step}
33
  bind:value={conversation.config[key]}
 
34
  />
35
- </div>
36
- <input
37
- id="temperature-range"
38
- type="range"
39
- {min}
40
- {max}
41
- {step}
42
- bind:value={conversation.config[key]}
43
- class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
44
- />
45
  </div>
46
  {/each}
47
 
 
1
  <script lang="ts">
2
+ import { type Conversation } from "$lib/types.js";
3
+ import { watch } from "runed";
4
  import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
5
+ import { maxAllowedTokens } from "./utils.js";
6
+ import { isNumber } from "$lib/utils/is.js";
7
+ import IconX from "~icons/carbon/close";
8
 
9
  interface Props {
10
  conversation: Conversation;
 
13
 
14
  let { conversation = $bindable(), classNames = "" }: Props = $props();
15
 
16
+ const maxTokens = $derived(maxAllowedTokens(conversation));
17
+
18
+ watch(
19
+ () => maxTokens,
20
+ () => {
21
+ const curr = conversation.config.max_tokens;
22
+ if (!curr || curr <= maxTokens) return;
23
+ conversation.config.max_tokens = maxTokens;
24
+ }
25
+ );
26
  </script>
27
 
28
  <div class="flex flex-col gap-y-7 {classNames}">
29
  {#each GENERATION_CONFIG_KEYS as key}
30
  {@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
31
+ {@const isMaxTokens = key === "max_tokens"}
32
+ {@const max = isMaxTokens ? maxTokens : GENERATION_CONFIG_SETTINGS[key].max}
33
+
34
  <div>
35
  <div class="flex items-center justify-between">
36
+ <label for={key} class="mb-2 block text-sm font-medium text-gray-900 dark:text-white">
37
+ {label}
38
+ </label>
39
+ <div class="flex items-center gap-2">
40
+ {#if !isMaxTokens || isNumber(conversation.config[key])}
41
+ <input
42
+ type="number"
43
+ class="w-20 rounded-sm border bg-transparent px-1 py-0.5 text-right text-sm dark:border-gray-700"
44
+ {min}
45
+ {max}
46
+ {step}
47
+ bind:value={conversation.config[key]}
48
+ />
49
+ {/if}
50
+ {#if isMaxTokens && isNumber(conversation.config[key])}
51
+ <button class="btn-mini" onclick={() => (conversation.config[key] = undefined)}> <IconX /> </button>
52
+ {:else if isMaxTokens}
53
+ <button class="btn-mini" onclick={() => (conversation.config[key] = maxTokens / 2)}> set </button>
54
+ {/if}
55
+ </div>
56
+ </div>
57
+ {#if !isMaxTokens || isNumber(conversation.config[key])}
58
  <input
59
+ id={key}
60
+ type="range"
61
  {min}
62
  {max}
63
  {step}
64
  bind:value={conversation.config[key]}
65
+ class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
66
  />
67
+ {/if}
 
 
 
 
 
 
 
 
 
68
  </div>
69
  {/each}
70
 
src/lib/components/inference-playground/playground.svelte CHANGED
@@ -6,7 +6,6 @@
6
  import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
7
  import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
8
  import { Popover } from "melt/components";
9
- import { watch } from "runed";
10
  import typia from "typia";
11
  import { default as IconDelete } from "~icons/carbon/trash-can";
12
  import { showShareModal } from "../share-modal.svelte";
@@ -19,7 +18,7 @@
19
  import ModelSelectorModal from "./model-selector-modal.svelte";
20
  import ModelSelector from "./model-selector.svelte";
21
  import ProjectSelect from "./project-select.svelte";
22
- import { getTokens, isSystemPromptSupported } from "./utils.js";
23
 
24
  import { iterate } from "$lib/utils/array.js";
25
  import IconChatLeft from "~icons/carbon/align-box-bottom-left";
@@ -43,19 +42,6 @@
43
 
44
  let selectCompareModelOpen = $state(false);
45
 
46
- watch(
47
- () => $state.snapshot(session.project),
48
- () => {
49
- session.project.conversations.forEach(async (c, i) => {
50
- session.generationStats[i] = {
51
- latency: 0,
52
- ...session.generationStats[i],
53
- generatedTokensCount: await getTokens(c),
54
- };
55
- });
56
- }
57
- );
58
-
59
  const systemPromptSupported = $derived(
60
  session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
61
  );
 
6
  import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
7
  import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
8
  import { Popover } from "melt/components";
 
9
  import typia from "typia";
10
  import { default as IconDelete } from "~icons/carbon/trash-can";
11
  import { showShareModal } from "../share-modal.svelte";
 
18
  import ModelSelectorModal from "./model-selector-modal.svelte";
19
  import ModelSelector from "./model-selector.svelte";
20
  import ProjectSelect from "./project-select.svelte";
21
+ import { isSystemPromptSupported } from "./utils.js";
22
 
23
  import { iterate } from "$lib/utils/array.js";
24
  import IconChatLeft from "~icons/carbon/align-box-bottom-left";
 
42
 
43
  let selectCompareModelOpen = $state(false);
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  const systemPromptSupported = $derived(
46
  session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
47
  );
src/lib/components/inference-playground/utils.ts CHANGED
@@ -1,15 +1,18 @@
1
- import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 
2
  import {
3
  isCustomModel,
 
4
  type Conversation,
5
  type ConversationMessage,
6
  type CustomModel,
7
  type Model,
8
  } from "$lib/types.js";
 
 
9
  import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
10
  import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
11
- import { token } from "$lib/state/token.svelte";
12
- import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
13
  import OpenAI from "openai";
14
 
15
  type ChatCompletionInputMessageChunk =
@@ -48,6 +51,24 @@ type OpenAICompletionMetadata = {
48
 
49
  type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
52
  const { model, systemMessage } = conversation;
53
 
@@ -88,6 +109,7 @@ function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal)
88
  messages: messages.map(parseMessage),
89
  provider: conversation.provider,
90
  ...conversation.config,
 
91
  },
92
  };
93
  }
@@ -284,11 +306,20 @@ export async function getTokenizer(model: Model) {
284
  }
285
  }
286
 
 
 
 
 
 
 
 
 
 
287
  export async function getTokens(conversation: Conversation): Promise<number> {
288
  const model = conversation.model;
289
- if (isCustomModel(model)) return 0;
290
  const tokenizer = await getTokenizer(model);
291
- if (tokenizer === null) return 0;
292
 
293
  // This is a simplified version - you might need to adjust based on your exact needs
294
  let formattedText = "";
 
1
+ import ctxLengthData from "$lib/data/context_length.json";
2
+ import { token } from "$lib/state/token.svelte";
3
  import {
4
  isCustomModel,
5
+ isHFModel,
6
  type Conversation,
7
  type ConversationMessage,
8
  type CustomModel,
9
  type Model,
10
  } from "$lib/types.js";
11
+ import { tryGet } from "$lib/utils/object.js";
12
+ import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
13
  import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
14
  import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
15
+ import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 
16
  import OpenAI from "openai";
17
 
18
  type ChatCompletionInputMessageChunk =
 
51
 
52
  type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
53
 
54
+ export function maxAllowedTokens(conversation: Conversation) {
55
+ const ctxLength = (() => {
56
+ const { provider, model } = conversation;
57
+ if (!provider || !isHFModel(model)) return;
58
+
59
+ const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId;
60
+ if (!idOnProvider) return;
61
+
62
+ const models = tryGet(ctxLengthData, provider);
63
+ if (!models) return;
64
+
65
+ return tryGet(models, idOnProvider) as number | undefined;
66
+ })();
67
+
68
+ if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
69
+ return ctxLength;
70
+ }
71
+
72
  function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
73
  const { model, systemMessage } = conversation;
74
 
 
109
  messages: messages.map(parseMessage),
110
  provider: conversation.provider,
111
  ...conversation.config,
112
+ // max_tokens: maxAllowedTokens(conversation) - currTokens,
113
  },
114
  };
115
  }
 
306
  }
307
  }
308
 
309
+ // When you don't have access to a tokenizer, guesstimate
310
+ export function estimateTokens(conversation: Conversation) {
311
+ const content = conversation.messages.reduce((acc, curr) => {
312
+ return acc + (curr?.content ?? "");
313
+ }, "");
314
+
315
+ return content.length / 4; // 1 token ~ 4 characters
316
+ }
317
+
318
  export async function getTokens(conversation: Conversation): Promise<number> {
319
  const model = conversation.model;
320
+ if (isCustomModel(model)) return estimateTokens(conversation);
321
  const tokenizer = await getTokenizer(model);
322
+ if (tokenizer === null) return estimateTokens(conversation);
323
 
324
  // This is a simplified version - you might need to adjust based on your exact needs
325
  let formattedText = "";
src/lib/data/context_length.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "replicate": {},
3
+ "sambanova": {
4
+ "DeepSeek-R1": 16384,
5
+ "DeepSeek-R1-Distill-Llama-70B": 131072,
6
+ "DeepSeek-V3-0324": 16384,
7
+ "E5-Mistral-7B-Instruct": 4096,
8
+ "Llama-4-Maverick-17B-128E-Instruct": 8192,
9
+ "Llama-4-Scout-17B-16E-Instruct": 8192,
10
+ "Meta-Llama-3.1-405B-Instruct": 16384,
11
+ "Meta-Llama-3.1-8B-Instruct": 16384,
12
+ "Meta-Llama-3.2-1B-Instruct": 16384,
13
+ "Meta-Llama-3.2-3B-Instruct": 4096,
14
+ "Meta-Llama-3.3-70B-Instruct": 131072,
15
+ "Meta-Llama-Guard-3-8B": 16384,
16
+ "QwQ-32B": 16384,
17
+ "Qwen2-Audio-7B-Instruct": 4096,
18
+ "Qwen3-32B": 8192
19
+ },
20
+ "nebius": {
21
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
22
+ "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
23
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-fast": 131072,
24
+ "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
25
+ "meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
26
+ "meta-llama/Llama-Guard-3-8B": 131072,
27
+ "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF-fast": 131072,
28
+ "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 131072,
29
+ "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
30
+ "mistralai/Mistral-Nemo-Instruct-2407-fast": 128000,
31
+ "mistralai/Mistral-Nemo-Instruct-2407": 128000,
32
+ "mistralai/Mixtral-8x7B-Instruct-v0.1-fast": 32768,
33
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
34
+ "mistralai/Mixtral-8x22B-Instruct-v0.1-fast": 65536,
35
+ "mistralai/Mixtral-8x22B-Instruct-v0.1": 65536,
36
+ "allenai/OLMo-7B-Instruct-hf": 2048,
37
+ "microsoft/Phi-3-mini-4k-instruct-fast": 4096,
38
+ "microsoft/Phi-3-mini-4k-instruct": 4096,
39
+ "microsoft/Phi-3-medium-128k-instruct-fast": 131072,
40
+ "microsoft/Phi-3-medium-128k-instruct": 131072,
41
+ "google/gemma-2-2b-it-fast": 8192,
42
+ "google/gemma-2-2b-it": 8192,
43
+ "google/gemma-2-9b-it-fast": 8192,
44
+ "google/gemma-2-9b-it": 8192,
45
+ "google/gemma-2-27b-it-fast": 8192,
46
+ "google/gemma-2-27b-it": 8192,
47
+ "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct-fast": 128000,
48
+ "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 128000,
49
+ "Qwen/Qwen2.5-Coder-7B-fast": 32768,
50
+ "Qwen/Qwen2.5-Coder-7B": 32768,
51
+ "Qwen/Qwen2.5-Coder-7B-Instruct-fast": 32768,
52
+ "Qwen/Qwen2.5-Coder-7B-Instruct": 32768,
53
+ "Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
54
+ "Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
55
+ "Qwen/Qwen2.5-32B-Instruct-fast": 131072,
56
+ "Qwen/Qwen2.5-32B-Instruct": 131072,
57
+ "Qwen/Qwen2.5-72B-Instruct-fast": 131072,
58
+ "Qwen/Qwen2.5-72B-Instruct": 131072,
59
+ "Qwen/Qwen2-VL-72B-Instruct": 32768,
60
+ "Qwen/Qwen2-VL-7B-Instruct": 32768,
61
+ "llava-hf/llava-1.5-7b-hf": 4096,
62
+ "llava-hf/llava-1.5-13b-hf": 4096,
63
+ "aaditya/Llama3-OpenBioLLM-8B": 8192,
64
+ "aaditya/Llama3-OpenBioLLM-70B": 8192,
65
+ "BAAI/bge-en-icl": 32768,
66
+ "BAAI/bge-multilingual-gemma2": 4096,
67
+ "intfloat/e5-mistral-7b-instruct": 32768,
68
+ "cognitivecomputations/dolphin-2.9.2-mixtral-8x22b": 65536,
69
+ "microsoft/Phi-3.5-MoE-instruct": 131072,
70
+ "microsoft/Phi-3.5-mini-instruct": 131072,
71
+ "Qwen/Qwen2.5-1.5B-Instruct": 32768,
72
+ "meta-llama/Llama-3.3-70B-Instruct": 131072,
73
+ "meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
74
+ "meta-llama/Llama-3.2-1B-Instruct": 131072,
75
+ "meta-llama/Llama-3.2-3B-Instruct": 131072,
76
+ "Qwen/QwQ-32B-Preview": 32768,
77
+ "Qwen/QVQ-72B-preview": 128000,
78
+ "microsoft/phi-4": 16384,
79
+ "deepseek-ai/DeepSeek-V3": 163840,
80
+ "deepseek-ai/DeepSeek-R1": 163840,
81
+ "NousResearch/Hermes-3-Llama-405B": 131072,
82
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
83
+ "deepseek-ai/DeepSeek-R1-fast": 163840,
84
+ "Qwen/QwQ-32B-fast": 131072,
85
+ "Qwen/QwQ-32B": 131072,
86
+ "nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
87
+ "mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072,
88
+ "google/gemma-3-27b-it": 131072,
89
+ "google/gemma-3-27b-it-fast": 131072,
90
+ "Qwen/Qwen2.5-VL-72B-Instruct": 32000,
91
+ "deepseek-ai/DeepSeek-V3-0324": 163840,
92
+ "deepseek-ai/DeepSeek-V3-0324-fast": 163840,
93
+ "black-forest-labs/flux-dev": 0,
94
+ "black-forest-labs/flux-schnell": 0,
95
+ "stability-ai/sdxl": 0
96
+ },
97
+ "novita": {
98
+ "deepseek/deepseek-prover-v2-671b": 160000,
99
+ "qwen/qwen3-235b-a22b-fp8": 128000,
100
+ "qwen/qwen3-30b-a3b-fp8": 128000,
101
+ "qwen/qwen3-32b-fp8": 128000,
102
+ "deepseek/deepseek-v3-0324": 128000,
103
+ "qwen/qwen2.5-vl-72b-instruct": 96000,
104
+ "deepseek/deepseek-v3-turbo": 64000,
105
+ "deepseek/deepseek-r1-turbo": 64000,
106
+ "meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
107
+ "google/gemma-3-27b-it": 32000,
108
+ "qwen/qwq-32b": 32768,
109
+ "Sao10K/L3-8B-Stheno-v3.2": 8192,
110
+ "gryphe/mythomax-l2-13b": 4096,
111
+ "meta-llama/llama-4-scout-17b-16e-instruct": 131072,
112
+ "deepseek/deepseek-r1-distill-llama-8b": 32000,
113
+ "deepseek/deepseek_v3": 64000,
114
+ "meta-llama/llama-3.1-8b-instruct": 16384,
115
+ "deepseek/deepseek-r1-distill-qwen-14b": 64000,
116
+ "meta-llama/llama-3.3-70b-instruct": 131072,
117
+ "qwen/qwen-2.5-72b-instruct": 32000,
118
+ "mistralai/mistral-nemo": 131072,
119
+ "deepseek/deepseek-r1-distill-qwen-32b": 64000,
120
+ "meta-llama/llama-3-8b-instruct": 8192,
121
+ "microsoft/wizardlm-2-8x22b": 65535,
122
+ "deepseek/deepseek-r1-distill-llama-70b": 32000,
123
+ "meta-llama/llama-3.1-70b-instruct": 32768,
124
+ "google/gemma-2-9b-it": 8192,
125
+ "mistralai/mistral-7b-instruct": 32768,
126
+ "meta-llama/llama-3-70b-instruct": 8192,
127
+ "deepseek/deepseek-r1": 64000,
128
+ "nousresearch/hermes-2-pro-llama-3-8b": 8192,
129
+ "sao10k/l3-70b-euryale-v2.1": 8192,
130
+ "cognitivecomputations/dolphin-mixtral-8x22b": 16000,
131
+ "jondurbin/airoboros-l2-70b": 4096,
132
+ "sophosympatheia/midnight-rose-70b": 4096,
133
+ "sao10k/l3-8b-lunaris": 8192,
134
+ "qwen/qwen3-0.6b-fp8": 32000,
135
+ "qwen/qwen3-1.7b-fp8": 32000,
136
+ "qwen/qwen3-8b-fp8": 128000,
137
+ "qwen/qwen3-4b-fp8": 128000,
138
+ "qwen/qwen3-14b-fp8": 128000,
139
+ "thudm/glm-4-9b-0414": 32000,
140
+ "thudm/glm-z1-9b-0414": 32000,
141
+ "thudm/glm-z1-32b-0414": 32000,
142
+ "thudm/glm-4-32b-0414": 32000,
143
+ "thudm/glm-z1-rumination-32b-0414": 32000,
144
+ "qwen/qwen2.5-7b-instruct": 32000,
145
+ "meta-llama/llama-3.2-1b-instruct": 131000,
146
+ "meta-llama/llama-3.2-11b-vision-instruct": 32768,
147
+ "meta-llama/llama-3.2-3b-instruct": 32768,
148
+ "meta-llama/llama-3.1-8b-instruct-bf16": 8192,
149
+ "sao10k/l31-70b-euryale-v2.2": 8192
150
+ },
151
+ "fal": {
152
+ "fal/model-name": 4096
153
+ },
154
+ "cerebras": {
155
+ "cerebras/model-name": 8192
156
+ },
157
+ "hf-inference": {
158
+ "google/gemma-2-9b-it": 8192,
159
+ "meta-llama/Meta-Llama-3-8B-Instruct": 8192
160
+ },
161
+ "hyperbolic": {
162
+ "Qwen/Qwen2.5-72B-Instruct": 131072,
163
+ "Qwen/Qwen2.5-VL-72B-Instruct": 32768,
164
+ "meta-llama/Meta-Llama-3-70B-Instruct": 8192,
165
+ "deepseek-ai/DeepSeek-V3": 131072,
166
+ "deepseek-ai/DeepSeek-V3-0324": 163840,
167
+ "meta-llama/Llama-3.3-70B-Instruct": 131072,
168
+ "Qwen/QwQ-32B-Preview": 32768,
169
+ "Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
170
+ "meta-llama/Llama-3.2-3B-Instruct": 131072,
171
+ "NousResearch/Hermes-3-Llama-3.1-70B": 12288,
172
+ "meta-llama/Meta-Llama-3.1-405B-Instruct": 131000,
173
+ "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
174
+ "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
175
+ "mistralai/Pixtral-12B-2409": 32768,
176
+ "Qwen/Qwen2.5-VL-7B-Instruct": 32768,
177
+ "meta-llama/Meta-Llama-3.1-405B": 32768,
178
+ "meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
179
+ "deepseek-ai/DeepSeek-R1": 163840,
180
+ "Qwen/QwQ-32B": 131072
181
+ },
182
+ "cohere": {
183
+ "embed-english-light-v3.0": 512,
184
+ "embed-multilingual-v2.0": 256,
185
+ "rerank-v3.5": 4096,
186
+ "embed-v4.0": 8192,
187
+ "rerank-english-v3.0": 4096,
188
+ "command-r": 128000,
189
+ "embed-english-light-v3.0-image": 0,
190
+ "embed-english-v3.0-image": 0,
191
+ "command-a-03-2025": 288000,
192
+ "command-nightly": 288000,
193
+ "command-r7b-12-2024": 128000,
194
+ "command-r-plus": 128000,
195
+ "c4ai-aya-vision-32b": 16384,
196
+ "command-r7b-arabic-02-2025": 128000,
197
+ "command-light-nightly": 4096,
198
+ "embed-english-v3.0": 512,
199
+ "embed-multilingual-light-v3.0-image": 0,
200
+ "embed-multilingual-v3.0-image": 0,
201
+ "c4ai-aya-expanse-32b": 128000,
202
+ "command": 4096,
203
+ "c4ai-aya-vision-8b": 16384
204
+ },
205
+ "together": {
206
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
207
+ "togethercomputer/m2-bert-80M-32k-retrieval": 32768,
208
+ "google/gemma-2-9b-it": 8192,
209
+ "cartesia/sonic": 0,
210
+ "Qwen/Qwen2.5-7B-Instruct-Turbo": 32768,
211
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
212
+ "meta-llama-llama-2-70b-hf": 4096,
213
+ "BAAI/bge-base-en-v1.5": 512,
214
+ "Gryphe/MythoMax-L2-13b": 4096,
215
+ "google/gemma-2-27b-it": 8192,
216
+ "Qwen/Qwen2-VL-72B-Instruct": 32768,
217
+ "meta-llama/LlamaGuard-2-8b": 8192,
218
+ "cartesia/sonic-2": 0,
219
+ "togethercomputer/m2-bert-80M-8k-retrieval": 8192,
220
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
221
+ "arcee-ai/maestro-reasoning": 131072,
222
+ "Qwen/QwQ-32B": 131072,
223
+ "togethercomputer/MoA-1": 32768,
224
+ "mistralai/Mistral-7B-Instruct-v0.2": 32768,
225
+ "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
226
+ "google/gemma-2b-it": 8192,
227
+ "mistralai/Mistral-Small-24B-Instruct-2501": 32768,
228
+ "Gryphe/MythoMax-L2-13b-Lite": 4096,
229
+ "meta-llama/Meta-Llama-Guard-3-8B": 8192,
230
+ "scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192,
231
+ "Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
232
+ "meta-llama/Llama-3-8b-chat-hf": 8192,
233
+ "arcee-ai/caller": 32768,
234
+ "togethercomputer/MoA-1-Turbo": 32768,
235
+ "mistralai/Mistral-7B-Instruct-v0.1": 32768,
236
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
237
+ "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
238
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072,
239
+ "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072,
240
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
241
+ "arcee-ai/virtuoso-medium-v2": 131072,
242
+ "arcee-ai/coder-large": 32768,
243
+ "arcee-ai/virtuoso-large": 131072,
244
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072,
245
+ "meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
246
+ "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072,
247
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576,
248
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072,
249
+ "mistralai/Mixtral-8x7B-v0.1": 32768,
250
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576,
251
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
252
+ "deepseek-ai/DeepSeek-R1": 163840,
253
+ "arcee-ai/arcee-blitz": 32768,
254
+ "deepseek-ai/DeepSeek-V3-p-dp": 131072,
255
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072,
256
+ "deepseek-ai/DeepSeek-V3": 131072,
257
+ "Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
258
+ "Qwen/Qwen2-72B-Instruct": 32768,
259
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
260
+ "meta-llama/Llama-3-70b-chat-hf": 8192,
261
+ "mistralai/Mistral-7B-Instruct-v0.3": 32768,
262
+ "Salesforce/Llama-Rank-V1": 8192,
263
+ "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768,
264
+ "meta-llama/Llama-Vision-Free": 131072,
265
+ "meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
266
+ "meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
267
+ "Qwen/Qwen2.5-72B-Instruct-Turbo": 131072,
268
+ "arcee_ai/arcee-spotlight": 131072,
269
+ "meta-llama/Llama-2-70b-hf": 4096,
270
+ "Qwen/Qwen2.5-VL-72B-Instruct": 32768
271
+ },
272
+ "fireworks-ai": {
273
+ "accounts/fireworks/models/qwq-32b": 131072,
274
+ "accounts/fireworks/models/qwen2-vl-72b-instruct": 32768,
275
+ "accounts/fireworks/models/deepseek-v3": 131072,
276
+ "accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
277
+ "accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
278
+ "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": 131072,
279
+ "accounts/fireworks/models/llama-v3-70b-instruct": 8192,
280
+ "accounts/fireworks/models/deepseek-v3-0324": 163840,
281
+ "accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000,
282
+ "accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576,
283
+ "accounts/fireworks/models/qwen3-30b-a3b": 131072,
284
+ "accounts/fireworks/models/llama4-scout-instruct-basic": 1048576,
285
+ "accounts/fireworks/models/deepseek-r1-basic": 163840,
286
+ "accounts/fireworks/models/qwen-qwq-32b-preview": 32768,
287
+ "accounts/fireworks/models/phi-3-vision-128k-instruct": 32064,
288
+ "accounts/fireworks/models/firesearch-ocr-v6": 131072,
289
+ "accounts/fireworks/models/llama-v3p3-70b-instruct": 131072,
290
+ "accounts/fireworks/models/deepseek-r1": 163840,
291
+ "accounts/yi-01-ai/models/yi-large": 32768,
292
+ "accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
293
+ "accounts/fireworks/models/llama-guard-3-8b": 131072,
294
+ "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
295
+ "accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
296
+ "accounts/fireworks/models/qwen2p5-72b-instruct": 32768,
297
+ "accounts/perplexity/models/r1-1776": 163840
298
+ }
299
+ }
src/lib/server/providers/cohere.ts ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const COHERE_API_URL = "https://api.cohere.ai/v1/models";
4
+
5
+ // Accept apiKey as an argument
6
+ export async function fetchCohereData(apiKey: string | undefined): Promise<MaxTokensCache["cohere"]> {
7
+ if (!apiKey) {
8
+ console.warn("Cohere API key not provided. Skipping Cohere fetch.");
9
+ return {};
10
+ }
11
+ try {
12
+ const response = await fetch(COHERE_API_URL, {
13
+ headers: {
14
+ Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
15
+ },
16
+ });
17
+ if (!response.ok) {
18
+ throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`);
19
+ }
20
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
+ const data: any = await response.json();
22
+ const modelsData: MaxTokensCache["cohere"] = {};
23
+ if (data?.models && Array.isArray(data.models)) {
24
+ for (const model of data.models) {
25
+ if (model.name && typeof model.context_length === "number") {
26
+ modelsData[model.name] = model.context_length;
27
+ }
28
+ }
29
+ }
30
+ return modelsData;
31
+ } catch (error) {
32
+ console.error("Error fetching Cohere data:", error);
33
+ return {};
34
+ }
35
+ }
src/lib/server/providers/fireworks.ts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed
4
+
5
+ export async function fetchFireworksData(apiKey: string | undefined): Promise<MaxTokensCache["fireworks-ai"]> {
6
+ if (!apiKey) {
7
+ console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch.");
8
+ return {};
9
+ }
10
+ try {
11
+ const response = await fetch(FIREWORKS_API_URL, {
12
+ headers: {
13
+ Authorization: `Bearer ${apiKey}`,
14
+ },
15
+ });
16
+ if (!response.ok) {
17
+ throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`);
18
+ }
19
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
+ const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
21
+ const modelsData: MaxTokensCache["fireworks-ai"] = {};
22
+
23
+ // Check if data and data.data exist and are an array
24
+ if (data?.data && Array.isArray(data.data)) {
25
+ for (const model of data.data) {
26
+ // Check for common context length fields (OpenAI uses context_window)
27
+ const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
28
+ // Fireworks uses model.id
29
+ if (model.id && typeof contextLength === "number") {
30
+ modelsData[model.id] = contextLength;
31
+ }
32
+ }
33
+ } else {
34
+ console.warn("Unexpected response structure from Fireworks AI API:", data);
35
+ }
36
+ return modelsData;
37
+ } catch (error) {
38
+ console.error("Error fetching Fireworks AI data:", error);
39
+ return {}; // Return empty on error
40
+ }
41
+ }
src/lib/server/providers/hyperbolic.ts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed
4
+
5
+ export async function fetchHyperbolicData(apiKey: string | undefined): Promise<MaxTokensCache["hyperbolic"]> {
6
+ if (!apiKey) {
7
+ console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch.");
8
+ return {};
9
+ }
10
+ try {
11
+ const response = await fetch(HYPERBOLIC_API_URL, {
12
+ headers: {
13
+ Authorization: `Bearer ${apiKey}`,
14
+ },
15
+ });
16
+ if (!response.ok) {
17
+ throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`);
18
+ }
19
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
+ const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
21
+ const modelsData: MaxTokensCache["hyperbolic"] = {};
22
+
23
+ // Check if data and data.data exist and are an array
24
+ if (data?.data && Array.isArray(data.data)) {
25
+ for (const model of data.data) {
26
+ // Check for common context length fields (OpenAI uses context_window)
27
+ const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
28
+ // Assuming Hyperbolic uses model.id
29
+ if (model.id && typeof contextLength === "number") {
30
+ modelsData[model.id] = contextLength;
31
+ }
32
+ }
33
+ } else {
34
+ console.warn("Unexpected response structure from Hyperbolic API:", data);
35
+ }
36
+ return modelsData;
37
+ } catch (error) {
38
+ console.error("Error fetching Hyperbolic data:", error);
39
+ return {}; // Return empty on error
40
+ }
41
+ }
src/lib/server/providers/index.ts ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+ import { fetchCohereData } from "./cohere.js";
4
+ import { fetchTogetherData } from "./together.js";
5
+ import { fetchFireworksData } from "./fireworks.js";
6
+ import { fetchHyperbolicData } from "./hyperbolic.js";
7
+ import { fetchReplicateData } from "./replicate.js";
8
+ import { fetchNebiusData } from "./nebius.js";
9
+ import { fetchNovitaData } from "./novita.js";
10
+ import { fetchSambanovaData } from "./sambanova.js";
11
+
12
+ // --- Constants ---
13
+ const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json");
14
+
15
+ // --- Types ---
16
+ export interface MaxTokensCache {
17
+ [provider: string]: {
18
+ [modelId: string]: number;
19
+ };
20
+ }
21
+
22
+ // Type for API keys object passed to fetchAllProviderData
23
+ export interface ApiKeys {
24
+ COHERE_API_KEY?: string;
25
+ TOGETHER_API_KEY?: string;
26
+ FIREWORKS_API_KEY?: string;
27
+ HYPERBOLIC_API_KEY?: string;
28
+ REPLICATE_API_KEY?: string;
29
+ NEBIUS_API_KEY?: string;
30
+ NOVITA_API_KEY?: string;
31
+ SAMBANOVA_API_KEY?: string;
32
+ }
33
+
34
+ // --- Cache Handling ---
35
+ // (readCache and updateCache remain the same)
36
+ let memoryCache: MaxTokensCache | null = null;
37
+ let cacheReadPromise: Promise<MaxTokensCache> | null = null;
38
+
39
+ async function readCache(): Promise<MaxTokensCache> {
40
+ if (memoryCache) {
41
+ return memoryCache;
42
+ }
43
+ if (cacheReadPromise) {
44
+ return cacheReadPromise;
45
+ }
46
+ cacheReadPromise = (async () => {
47
+ try {
48
+ const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
49
+ memoryCache = JSON.parse(data) as MaxTokensCache;
50
+ return memoryCache!;
51
+ } catch (error: unknown) {
52
+ if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") {
53
+ console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`);
54
+ memoryCache = {};
55
+ return {};
56
+ }
57
+ console.error("Error reading context length cache file:", error);
58
+ memoryCache = {};
59
+ return {};
60
+ } finally {
61
+ cacheReadPromise = null;
62
+ }
63
+ })();
64
+ return cacheReadPromise;
65
+ }
66
+
67
+ const isBrowser = typeof window !== "undefined";
68
+
69
+ function serverLog(...txt: unknown[]) {
70
+ if (isBrowser) return;
71
+ console.log(...txt);
72
+ }
73
+
74
+ function serverError(...txt: unknown[]) {
75
+ if (isBrowser) return;
76
+ console.error(...txt);
77
+ }
78
+
79
+ async function updateCache(provider: string, modelId: string, maxTokens: number): Promise<void> {
80
+ try {
81
+ let cache: MaxTokensCache;
82
+ try {
83
+ const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
84
+ cache = JSON.parse(data) as MaxTokensCache;
85
+ } catch (readError: unknown) {
86
+ if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") {
87
+ cache = {};
88
+ } else {
89
+ throw readError;
90
+ }
91
+ }
92
+ if (!cache[provider]) {
93
+ cache[provider] = {};
94
+ }
95
+ cache[provider][modelId] = maxTokens;
96
+ const tempFilePath = CACHE_FILE_PATH + ".tmp";
97
+ await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8");
98
+ await fs.rename(tempFilePath, CACHE_FILE_PATH);
99
+ memoryCache = cache;
100
+ serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`);
101
+ } catch (error) {
102
+ serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error);
103
+ memoryCache = null;
104
+ }
105
+ }
106
+
107
+ // --- Main Exported Function ---
108
+ // Now accepts apiKey as the third argument
109
+ export async function getMaxTokens(
110
+ provider: string,
111
+ modelId: string,
112
+ apiKey: string | undefined
113
+ ): Promise<number | null> {
114
+ const cache = await readCache();
115
+ const cachedValue = cache[provider]?.[modelId];
116
+
117
+ if (cachedValue !== undefined) {
118
+ return cachedValue;
119
+ }
120
+
121
+ serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`);
122
+
123
+ let liveData: number | null = null;
124
+ let fetchedProviderData: MaxTokensCache[string] | null = null;
125
+
126
+ try {
127
+ // Pass the received apiKey to the fetcher functions
128
+ switch (provider) {
129
+ case "cohere":
130
+ fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey
131
+ liveData = fetchedProviderData?.[modelId] ?? null;
132
+ break;
133
+ case "together":
134
+ fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey
135
+ liveData = fetchedProviderData?.[modelId] ?? null;
136
+ break;
137
+ case "fireworks-ai":
138
+ fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey
139
+ liveData = fetchedProviderData?.[modelId] ?? null;
140
+ break;
141
+ case "hyperbolic":
142
+ fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey
143
+ liveData = fetchedProviderData?.[modelId] ?? null;
144
+ break;
145
+ case "replicate":
146
+ fetchedProviderData = await fetchReplicateData(apiKey);
147
+ liveData = fetchedProviderData?.[modelId] ?? null;
148
+ break;
149
+ case "nebius":
150
+ fetchedProviderData = await fetchNebiusData(apiKey);
151
+ liveData = fetchedProviderData?.[modelId] ?? null;
152
+ break;
153
+ case "novita":
154
+ fetchedProviderData = await fetchNovitaData(apiKey);
155
+ liveData = fetchedProviderData?.[modelId] ?? null;
156
+ break;
157
+ case "sambanova":
158
+ fetchedProviderData = await fetchSambanovaData(apiKey);
159
+ liveData = fetchedProviderData?.[modelId] ?? null;
160
+ break;
161
+ default:
162
+ serverLog(`Live fetch not supported or implemented for provider: ${provider}`);
163
+ return null;
164
+ }
165
+
166
+ if (liveData !== null) {
167
+ serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`);
168
+ updateCache(provider, modelId, liveData).catch(err => {
169
+ serverError(`Async cache update failed for ${provider} - ${modelId}:`, err);
170
+ });
171
+ return liveData;
172
+ } else {
173
+ serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`);
174
+ return null;
175
+ }
176
+ } catch (error) {
177
+ serverError(`Error during live fetch for ${provider} - ${modelId}:`, error);
178
+ return null;
179
+ }
180
+ }
181
+
182
+ // --- Helper for Build Script ---
183
+ // Now accepts an apiKeys object
184
+ export async function fetchAllProviderData(apiKeys: ApiKeys): Promise<MaxTokensCache> {
185
+ serverLog("Fetching data for all providers...");
186
+ const results: MaxTokensCache = {};
187
+
188
+ // Define fetchers, passing the specific key from the apiKeys object
189
+ const providerFetchers = [
190
+ { name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) },
191
+ { name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) },
192
+ { name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) },
193
+ { name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) },
194
+ { name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) },
195
+ { name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) },
196
+ { name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) },
197
+ { name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) },
198
+ ];
199
+
200
+ const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher()));
201
+
202
+ settledResults.forEach((result, index) => {
203
+ const providerInfo = providerFetchers[index];
204
+ if (!providerInfo) {
205
+ serverError(`Error: No provider info found for index ${index}`);
206
+ return;
207
+ }
208
+ const providerName = providerInfo.name;
209
+
210
+ if (result.status === "fulfilled" && result.value) {
211
+ if (Object.keys(result.value).length > 0) {
212
+ results[providerName] = result.value;
213
+ serverLog(`Successfully fetched data for ${providerName}`);
214
+ } else {
215
+ serverLog(`No data returned for ${providerName}.`);
216
+ }
217
+ } else if (result.status === "rejected") {
218
+ serverError(`Error fetching ${providerName} data:`, result.reason);
219
+ }
220
+ });
221
+
222
+ serverLog("Finished fetching provider data.");
223
+ return results;
224
+ }
src/lib/server/providers/nebius.ts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ interface NebiusModel {
4
+ id: string;
5
+ config?: {
6
+ max_tokens?: number;
7
+ };
8
+ context_length?: number;
9
+ }
10
+
11
+ interface NebiusResponse {
12
+ data?: NebiusModel[];
13
+ }
14
+
15
+ const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true";
16
+
17
+ export async function fetchNebiusData(apiKey: string | undefined): Promise<MaxTokensCache["nebius"]> {
18
+ if (!apiKey) {
19
+ console.warn("Nebius API key not provided. Skipping Nebius fetch.");
20
+ return {};
21
+ }
22
+ try {
23
+ const response = await fetch(NEBIUS_API_URL, {
24
+ headers: {
25
+ Authorization: `Bearer ${apiKey}`,
26
+ },
27
+ });
28
+ if (!response.ok) {
29
+ throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`);
30
+ }
31
+ const data: NebiusResponse = await response.json();
32
+ const modelsData: MaxTokensCache["nebius"] = {};
33
+
34
+ if (data?.data && Array.isArray(data.data)) {
35
+ for (const model of data.data) {
36
+ const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
37
+ if (model.id && typeof contextLength === "number") {
38
+ modelsData[model.id] = contextLength;
39
+ }
40
+ }
41
+ } else {
42
+ console.warn("Unexpected response structure from Nebius API:", data);
43
+ }
44
+ return modelsData;
45
+ } catch (error) {
46
+ console.error("Error fetching Nebius data:", error);
47
+ return {};
48
+ }
49
+ }
src/lib/server/providers/novita.ts ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models";
4
+
5
+ interface NovitaModel {
6
+ id: string;
7
+ object: string;
8
+ context_size: number;
9
+ }
10
+
11
+ interface NovitaResponse {
12
+ data: NovitaModel[];
13
+ }
14
+
15
+ export async function fetchNovitaData(apiKey: string | undefined): Promise<MaxTokensCache["novita"]> {
16
+ if (!apiKey) {
17
+ console.warn("Novita API key not provided. Skipping Novita fetch.");
18
+ return {};
19
+ }
20
+ try {
21
+ const response = await fetch(NOVITA_API_URL, {
22
+ headers: {
23
+ Authorization: `Bearer ${apiKey}`,
24
+ },
25
+ });
26
+ if (!response.ok) {
27
+ throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`);
28
+ }
29
+ const data: NovitaResponse = await response.json();
30
+ const modelsData: MaxTokensCache["novita"] = {};
31
+
32
+ if (data?.data && Array.isArray(data.data)) {
33
+ for (const model of data.data) {
34
+ if (model.id && typeof model.context_size === "number") {
35
+ modelsData[model.id] = model.context_size;
36
+ }
37
+ }
38
+ } else {
39
+ console.warn("Unexpected response structure from Novita API:", data);
40
+ }
41
+ return modelsData;
42
+ } catch (error) {
43
+ console.error("Error fetching Novita data:", error);
44
+ return {};
45
+ }
46
+ }
src/lib/server/providers/replicate.ts ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const REPLICATE_API_URL = "https://api.replicate.com/v1/models";
4
+
5
+ export async function fetchReplicateData(apiKey: string | undefined): Promise<MaxTokensCache["replicate"]> {
6
+ if (!apiKey) {
7
+ console.warn("Replicate API key not provided. Skipping Replicate fetch.");
8
+ return {};
9
+ }
10
+ try {
11
+ const response = await fetch(REPLICATE_API_URL, {
12
+ headers: {
13
+ Authorization: `Token ${apiKey}`,
14
+ },
15
+ });
16
+ if (!response.ok) {
17
+ throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`);
18
+ }
19
+ const data = await response.json();
20
+ const modelsData: MaxTokensCache["replicate"] = {};
21
+
22
+ if (data?.results && Array.isArray(data.results)) {
23
+ for (const model of data.results) {
24
+ const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
25
+ if (model.id && typeof contextLength === "number") {
26
+ modelsData[model.id] = contextLength;
27
+ }
28
+ }
29
+ } else {
30
+ console.warn("Unexpected response structure from Replicate API:", data);
31
+ }
32
+ return modelsData;
33
+ } catch (error) {
34
+ console.error("Error fetching Replicate data:", error);
35
+ return {};
36
+ }
37
+ }
src/lib/server/providers/sambanova.ts ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models";
4
+
5
+ interface SambanovaModel {
6
+ id: string;
7
+ object: string;
8
+ context_length: number;
9
+ max_completion_tokens?: number;
10
+ pricing?: {
11
+ prompt: string;
12
+ completion: string;
13
+ };
14
+ }
15
+
16
+ interface SambanovaResponse {
17
+ data: SambanovaModel[];
18
+ object: string;
19
+ }
20
+
21
+ export async function fetchSambanovaData(apiKey: string | undefined): Promise<MaxTokensCache["sambanova"]> {
22
+ if (!apiKey) {
23
+ console.warn("SambaNova API key not provided. Skipping SambaNova fetch.");
24
+ return {};
25
+ }
26
+ try {
27
+ const response = await fetch(SAMBANOVA_API_URL, {
28
+ headers: {
29
+ Authorization: `Bearer ${apiKey}`,
30
+ },
31
+ });
32
+ if (!response.ok) {
33
+ throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`);
34
+ }
35
+ const data: SambanovaResponse = await response.json();
36
+ const modelsData: MaxTokensCache["sambanova"] = {};
37
+
38
+ if (data?.data && Array.isArray(data.data)) {
39
+ for (const model of data.data) {
40
+ if (model.id && typeof model.context_length === "number") {
41
+ modelsData[model.id] = model.context_length;
42
+ }
43
+ }
44
+ } else {
45
+ console.warn("Unexpected response structure from SambaNova API:", data);
46
+ }
47
+ return modelsData;
48
+ } catch (error) {
49
+ console.error("Error fetching SambaNova data:", error);
50
+ return {};
51
+ }
52
+ }
src/lib/server/providers/together.ts ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MaxTokensCache } from "./index.js";
2
+
3
+ const TOGETHER_API_URL = "https://api.together.xyz/v1/models";
4
+
5
+ // Accept apiKey as an argument
6
+ export async function fetchTogetherData(apiKey: string | undefined): Promise<MaxTokensCache["together"]> {
7
+ if (!apiKey) {
8
+ console.warn("Together AI API key not provided. Skipping Together AI fetch.");
9
+ return {};
10
+ }
11
+ try {
12
+ const response = await fetch(TOGETHER_API_URL, {
13
+ headers: {
14
+ Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
15
+ },
16
+ });
17
+ if (!response.ok) {
18
+ throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`);
19
+ }
20
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
+ const data: any[] = await response.json();
22
+ const modelsData: MaxTokensCache["together"] = {};
23
+
24
+ if (Array.isArray(data)) {
25
+ for (const model of data) {
26
+ const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
27
+ if (model.id && typeof contextLength === "number") {
28
+ modelsData[model.id] = contextLength;
29
+ }
30
+ }
31
+ }
32
+ return modelsData;
33
+ } catch (error) {
34
+ console.error("Error fetching Together AI data:", error);
35
+ return {};
36
+ }
37
+ }
src/lib/state/generation-stats.svelte.ts ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { getTokens } from "$lib/components/inference-playground/utils.js";
2
+ import { watch } from "runed";
3
+ import { session } from "./session.svelte";
4
+
5
+ export interface GenerationStats {
6
+ latency: number;
7
+ generatedTokensCount: number;
8
+ }
9
+
10
+ function createGenerationStats() {
11
+ let stats = $state([] as Array<GenerationStats>);
12
+
13
+ const init = () => {
14
+ watch(
15
+ () => $state.snapshot(session.project),
16
+ () => {
17
+ session.project.conversations.forEach(async (c, i) => {
18
+ generationStats[i] = { latency: 0, ...generationStats[i], generatedTokensCount: await getTokens(c) };
19
+ });
20
+ }
21
+ );
22
+ };
23
+
24
+ const set = (s: Array<GenerationStats>) => {
25
+ stats = s;
26
+ };
27
+
28
+ return Object.assign(stats, { set, init });
29
+ }
30
+
31
+ export const generationStats = createGenerationStats();
src/lib/types.ts CHANGED
@@ -27,6 +27,7 @@ export type ConversationWithHFModel = Conversation & {
27
  export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
28
  export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
29
 
 
30
  export const isCustomModel = typia.createIs<CustomModel>();
31
 
32
  export type Project = {
 
27
  export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
28
  export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
29
 
30
+ export const isHFModel = typia.createIs<Model>();
31
  export const isCustomModel = typia.createIs<CustomModel>();
32
 
33
  export type Project = {
src/lib/utils/is.ts CHANGED
@@ -1,4 +1,5 @@
1
  import { SvelteSet } from "svelte/reactivity";
 
2
 
3
  export function isHtmlElement(element: unknown): element is HTMLElement {
4
  return element instanceof HTMLElement;
@@ -35,3 +36,5 @@ export function isTouch(event: PointerEvent): boolean {
35
  export function isPromise(value: unknown): value is Promise<unknown> {
36
  return value instanceof Promise;
37
  }
 
 
 
1
  import { SvelteSet } from "svelte/reactivity";
2
+ import typia from "typia";
3
 
4
  export function isHtmlElement(element: unknown): element is HTMLElement {
5
  return element instanceof HTMLElement;
 
36
  export function isPromise(value: unknown): value is Promise<unknown> {
37
  return value instanceof Promise;
38
  }
39
+
40
+ export const isNumber = typia.createIs<number>();
src/lib/utils/object.ts CHANGED
@@ -32,3 +32,12 @@ export function pick<T extends Record<string, unknown>, K extends keyof T>(obj:
32
  }
33
  return result;
34
  }
 
 
 
 
 
 
 
 
 
 
32
  }
33
  return result;
34
  }
35
+
36
+ /**
37
+ * Try and get a value from an object, or return undefined.
38
+ * The key does not need to match the type of the object, so the
39
+ * returned type is an union of all values, and undefined
40
+ */
41
+ export function tryGet<T extends Record<string, unknown>>(obj: T, key: string): T[keyof T] | undefined {
42
+ return obj[key as keyof T];
43
+ }
src/routes/+layout.svelte CHANGED
@@ -6,6 +6,7 @@
6
  import ShareModal from "$lib/components/share-modal.svelte";
7
  import "../app.css";
8
  import { session } from "$lib/state/session.svelte";
 
9
 
10
  interface Props {
11
  children?: import("svelte").Snippet;
@@ -13,6 +14,7 @@
13
 
14
  let { children }: Props = $props();
15
  session.init();
 
16
  </script>
17
 
18
  {@render children?.()}
 
6
  import ShareModal from "$lib/components/share-modal.svelte";
7
  import "../app.css";
8
  import { session } from "$lib/state/session.svelte";
9
+ import { generationStats } from "$lib/state/generation-stats.svelte";
10
 
11
  interface Props {
12
  children?: import("svelte").Snippet;
 
14
 
15
  let { children }: Props = $props();
16
  session.init();
17
+ generationStats.init();
18
  </script>
19
 
20
  {@render children?.()}