Thomas G. Lopes
commited on
Accurate max tokens (#77)
Browse files- .env.example +9 -0
- .prettierignore +2 -0
- eslint.config.mts +1 -0
- package.json +7 -4
- pnpm-lock.yaml +54 -30
- scripts/update-ctx-length.ts +55 -0
- src/app.css +12 -0
- src/lib/components/inference-playground/generation-config-settings.ts +1 -2
- src/lib/components/inference-playground/generation-config.svelte +44 -21
- src/lib/components/inference-playground/playground.svelte +1 -15
- src/lib/components/inference-playground/utils.ts +36 -5
- src/lib/data/context_length.json +299 -0
- src/lib/server/providers/cohere.ts +35 -0
- src/lib/server/providers/fireworks.ts +41 -0
- src/lib/server/providers/hyperbolic.ts +41 -0
- src/lib/server/providers/index.ts +224 -0
- src/lib/server/providers/nebius.ts +49 -0
- src/lib/server/providers/novita.ts +46 -0
- src/lib/server/providers/replicate.ts +37 -0
- src/lib/server/providers/sambanova.ts +52 -0
- src/lib/server/providers/together.ts +37 -0
- src/lib/state/generation-stats.svelte.ts +31 -0
- src/lib/types.ts +1 -0
- src/lib/utils/is.ts +3 -0
- src/lib/utils/object.ts +9 -0
- src/routes/+layout.svelte +2 -0
.env.example
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HYPERBOLIC_API_KEY=
|
2 |
+
COHERE_API_KEY=
|
3 |
+
TOGETHER_API_KEY=
|
4 |
+
FIREWORKS_API_KEY=
|
5 |
+
REPLICATE_API_KEY=
|
6 |
+
NEBIUS_API_KEY=
|
7 |
+
NOVITA_API_KEY=
|
8 |
+
FAL_API_KEY=
|
9 |
+
HF_TOKEN=
|
.prettierignore
CHANGED
@@ -16,3 +16,5 @@ node_modules
|
|
16 |
# Ignore files for PNPM, NPM and YARN
|
17 |
pnpm-lock.yaml
|
18 |
yarn.lock
|
|
|
|
|
|
16 |
# Ignore files for PNPM, NPM and YARN
|
17 |
pnpm-lock.yaml
|
18 |
yarn.lock
|
19 |
+
|
20 |
+
context_length.json
|
eslint.config.mts
CHANGED
@@ -86,6 +86,7 @@ export default ts.config(
|
|
86 |
"**/pnpm-lock.yaml",
|
87 |
"**/package-lock.json",
|
88 |
"**/yarn.lock",
|
|
|
89 |
],
|
90 |
},
|
91 |
{
|
|
|
86 |
"**/pnpm-lock.yaml",
|
87 |
"**/package-lock.json",
|
88 |
"**/yarn.lock",
|
89 |
+
"context_length.json",
|
90 |
],
|
91 |
},
|
92 |
{
|
package.json
CHANGED
@@ -3,24 +3,25 @@
|
|
3 |
"version": "0.0.1",
|
4 |
"private": true,
|
5 |
"scripts": {
|
6 |
-
"dev": "vite dev",
|
7 |
-
"build": "vite build",
|
8 |
"preview": "vite preview",
|
9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
10 |
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
11 |
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
12 |
"lint": "prettier . --check . && eslint src/",
|
13 |
"format": "prettier . --write .",
|
14 |
-
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'"
|
|
|
15 |
},
|
16 |
"devDependencies": {
|
17 |
"@eslint/eslintrc": "^3.3.0",
|
18 |
"@eslint/js": "^9.22.0",
|
19 |
"@floating-ui/dom": "^1.6.13",
|
20 |
"@huggingface/hub": "^1.0.1",
|
21 |
-
"@huggingface/transformers": "^3.4.2",
|
22 |
"@huggingface/inference": "^3.5.1",
|
23 |
"@huggingface/tasks": "^0.17.1",
|
|
|
24 |
"@iconify-json/carbon": "^1.2.8",
|
25 |
"@iconify-json/material-symbols": "^1.2.15",
|
26 |
"@ryoppippi/unplugin-typia": "^1.0.0",
|
@@ -31,7 +32,9 @@
|
|
31 |
"@sveltejs/vite-plugin-svelte": "^4.0.0",
|
32 |
"@tailwindcss/container-queries": "^0.1.1",
|
33 |
"@tailwindcss/postcss": "^4.0.9",
|
|
|
34 |
"clsx": "^2.1.1",
|
|
|
35 |
"eslint": "^9.22.0",
|
36 |
"eslint-config-prettier": "^10.1.1",
|
37 |
"eslint-plugin-prettier": "^5.2.3",
|
|
|
3 |
"version": "0.0.1",
|
4 |
"private": true,
|
5 |
"scripts": {
|
6 |
+
"dev": "pnpm run update-ctx-length && vite dev",
|
7 |
+
"build": "pnpm run update-ctx-length && vite build",
|
8 |
"preview": "vite preview",
|
9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
10 |
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
11 |
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
12 |
"lint": "prettier . --check . && eslint src/",
|
13 |
"format": "prettier . --write .",
|
14 |
+
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
|
15 |
+
"update-ctx-length": "jiti scripts/update-ctx-length.ts"
|
16 |
},
|
17 |
"devDependencies": {
|
18 |
"@eslint/eslintrc": "^3.3.0",
|
19 |
"@eslint/js": "^9.22.0",
|
20 |
"@floating-ui/dom": "^1.6.13",
|
21 |
"@huggingface/hub": "^1.0.1",
|
|
|
22 |
"@huggingface/inference": "^3.5.1",
|
23 |
"@huggingface/tasks": "^0.17.1",
|
24 |
+
"@huggingface/transformers": "^3.4.2",
|
25 |
"@iconify-json/carbon": "^1.2.8",
|
26 |
"@iconify-json/material-symbols": "^1.2.15",
|
27 |
"@ryoppippi/unplugin-typia": "^1.0.0",
|
|
|
32 |
"@sveltejs/vite-plugin-svelte": "^4.0.0",
|
33 |
"@tailwindcss/container-queries": "^0.1.1",
|
34 |
"@tailwindcss/postcss": "^4.0.9",
|
35 |
+
"@types/node": "^22.14.1",
|
36 |
"clsx": "^2.1.1",
|
37 |
+
"dotenv": "^16.5.0",
|
38 |
"eslint": "^9.22.0",
|
39 |
"eslint-config-prettier": "^10.1.1",
|
40 |
"eslint-plugin-prettier": "^5.2.3",
|
pnpm-lock.yaml
CHANGED
@@ -44,31 +44,37 @@ importers:
|
|
44 |
version: 1.2.15
|
45 |
'@ryoppippi/unplugin-typia':
|
46 |
specifier: ^1.0.0
|
47 |
-
version: 1.2.0(@samchon/[email protected])(@types/node@
|
48 |
'@samchon/openapi':
|
49 |
specifier: ^3.0.0
|
50 |
version: 3.0.0
|
51 |
'@sveltejs/adapter-auto':
|
52 |
specifier: ^3.2.2
|
53 |
-
version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
54 |
'@sveltejs/adapter-node':
|
55 |
specifier: ^5.2.0
|
56 |
-
version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
57 |
'@sveltejs/kit':
|
58 |
specifier: ^2.5.27
|
59 |
-
version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
60 |
'@sveltejs/vite-plugin-svelte':
|
61 |
specifier: ^4.0.0
|
62 |
-
version: 4.0.4([email protected])([email protected](@types/node@
|
63 |
'@tailwindcss/container-queries':
|
64 |
specifier: ^0.1.1
|
65 |
version: 0.1.1([email protected])
|
66 |
'@tailwindcss/postcss':
|
67 |
specifier: ^4.0.9
|
68 |
version: 4.0.9
|
|
|
|
|
|
|
69 |
clsx:
|
70 |
specifier: ^2.1.1
|
71 |
version: 2.1.1
|
|
|
|
|
|
|
72 |
eslint:
|
73 |
specifier: ^9.22.0
|
74 |
version: 9.22.0([email protected])
|
@@ -137,7 +143,7 @@ importers:
|
|
137 |
version: 22.1.0([email protected])
|
138 |
vite:
|
139 |
specifier: ^5.4.4
|
140 |
-
version: 5.4.14(@types/node@
|
141 |
|
142 |
packages:
|
143 |
|
@@ -1001,6 +1007,9 @@ packages:
|
|
1001 |
'@types/[email protected]':
|
1002 |
resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
|
1003 |
|
|
|
|
|
|
|
1004 |
'@types/[email protected]':
|
1005 |
resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
|
1006 |
|
@@ -1284,6 +1293,10 @@ packages:
|
|
1284 |
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
1285 |
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
1286 |
|
|
|
|
|
|
|
|
|
1287 | |
1288 |
resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
|
1289 |
engines: {node: '>=4'}
|
@@ -2404,6 +2417,9 @@ packages:
|
|
2404 | |
2405 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
2406 |
|
|
|
|
|
|
|
2407 | |
2408 |
resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
|
2409 |
peerDependencies:
|
@@ -3087,7 +3103,7 @@ snapshots:
|
|
3087 |
'@rollup/[email protected]':
|
3088 |
optional: true
|
3089 |
|
3090 |
-
'@ryoppippi/[email protected](@samchon/[email protected])(@types/node@
|
3091 |
dependencies:
|
3092 |
'@rollup/pluginutils': 5.1.4([email protected])
|
3093 |
consola: 3.4.0
|
@@ -3101,7 +3117,7 @@ snapshots:
|
|
3101 |
typescript: 5.6.3
|
3102 |
typia: 7.6.4(@samchon/[email protected])([email protected])
|
3103 |
unplugin: 1.16.1
|
3104 |
-
vite: 6.2.1(@types/node@
|
3105 |
transitivePeerDependencies:
|
3106 |
- '@samchon/openapi'
|
3107 |
- '@types/node'
|
@@ -3125,22 +3141,22 @@ snapshots:
|
|
3125 |
dependencies:
|
3126 |
acorn: 8.14.0
|
3127 |
|
3128 |
-
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3129 |
dependencies:
|
3130 |
-
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3131 |
import-meta-resolve: 4.1.0
|
3132 |
|
3133 |
-
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3134 |
dependencies:
|
3135 |
'@rollup/plugin-commonjs': 28.0.2([email protected])
|
3136 |
'@rollup/plugin-json': 6.1.0([email protected])
|
3137 |
'@rollup/plugin-node-resolve': 16.0.0([email protected])
|
3138 |
-
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3139 |
rollup: 4.34.9
|
3140 |
|
3141 |
-
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3142 |
dependencies:
|
3143 |
-
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@
|
3144 |
'@types/cookie': 0.6.0
|
3145 |
cookie: 0.6.0
|
3146 |
devalue: 5.1.1
|
@@ -3153,27 +3169,27 @@ snapshots:
|
|
3153 |
set-cookie-parser: 2.7.1
|
3154 |
sirv: 3.0.1
|
3155 |
svelte: 5.28.2
|
3156 |
-
vite: 5.4.14(@types/node@
|
3157 |
|
3158 |
-
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3159 |
dependencies:
|
3160 |
-
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@
|
3161 |
debug: 4.4.0
|
3162 |
svelte: 5.28.2
|
3163 |
-
vite: 5.4.14(@types/node@
|
3164 |
transitivePeerDependencies:
|
3165 |
- supports-color
|
3166 |
|
3167 |
-
'@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3168 |
dependencies:
|
3169 |
-
'@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
3170 |
debug: 4.4.0
|
3171 |
deepmerge: 4.3.1
|
3172 |
kleur: 4.1.5
|
3173 |
magic-string: 0.30.17
|
3174 |
svelte: 5.28.2
|
3175 |
-
vite: 5.4.14(@types/node@
|
3176 |
-
vitefu: 1.0.6([email protected](@types/node@
|
3177 |
transitivePeerDependencies:
|
3178 |
- supports-color
|
3179 |
|
@@ -3251,13 +3267,17 @@ snapshots:
|
|
3251 |
|
3252 |
'@types/[email protected]':
|
3253 |
dependencies:
|
3254 |
-
'@types/node':
|
3255 |
form-data: 4.0.2
|
3256 |
|
3257 |
'@types/[email protected]':
|
3258 |
dependencies:
|
3259 |
undici-types: 5.26.5
|
3260 |
|
|
|
|
|
|
|
|
|
3261 |
'@types/[email protected]': {}
|
3262 |
|
3263 |
'@typescript-eslint/[email protected](@typescript-eslint/[email protected]([email protected]([email protected]))([email protected]))([email protected]([email protected]))([email protected])':
|
@@ -3524,6 +3544,8 @@ snapshots:
|
|
3524 |
|
3525 | |
3526 |
|
|
|
|
|
3527 | |
3528 |
|
3529 | |
@@ -4336,7 +4358,7 @@ snapshots:
|
|
4336 |
'@protobufjs/path': 1.1.2
|
4337 |
'@protobufjs/pool': 1.1.0
|
4338 |
'@protobufjs/utf8': 1.1.0
|
4339 |
-
'@types/node':
|
4340 |
long: 5.3.1
|
4341 |
|
4342 | |
@@ -4641,6 +4663,8 @@ snapshots:
|
|
4641 |
|
4642 | |
4643 |
|
|
|
|
|
4644 | |
4645 |
dependencies:
|
4646 |
'@antfu/install-pkg': 1.0.0
|
@@ -4669,31 +4693,31 @@ snapshots:
|
|
4669 |
|
4670 | |
4671 |
|
4672 |
-
[email protected](@types/node@
|
4673 |
dependencies:
|
4674 |
esbuild: 0.21.5
|
4675 |
postcss: 8.5.3
|
4676 |
rollup: 4.34.9
|
4677 |
optionalDependencies:
|
4678 |
-
'@types/node':
|
4679 |
fsevents: 2.3.3
|
4680 |
lightningcss: 1.29.1
|
4681 |
|
4682 |
-
[email protected](@types/node@
|
4683 |
dependencies:
|
4684 |
esbuild: 0.25.1
|
4685 |
postcss: 8.5.3
|
4686 |
rollup: 4.34.9
|
4687 |
optionalDependencies:
|
4688 |
-
'@types/node':
|
4689 |
fsevents: 2.3.3
|
4690 |
jiti: 2.4.2
|
4691 |
lightningcss: 1.29.1
|
4692 |
yaml: 2.7.0
|
4693 |
|
4694 |
-
[email protected]([email protected](@types/node@
|
4695 |
optionalDependencies:
|
4696 |
-
vite: 5.4.14(@types/node@
|
4697 |
|
4698 | |
4699 |
dependencies:
|
|
|
44 |
version: 1.2.15
|
45 |
'@ryoppippi/unplugin-typia':
|
46 |
specifier: ^1.0.0
|
47 |
+
version: 1.2.0(@samchon/[email protected])(@types/node@22.14.1)([email protected])([email protected])([email protected])([email protected])
|
48 |
'@samchon/openapi':
|
49 |
specifier: ^3.0.0
|
50 |
version: 3.0.0
|
51 |
'@sveltejs/adapter-auto':
|
52 |
specifier: ^3.2.2
|
53 |
+
version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
|
54 |
'@sveltejs/adapter-node':
|
55 |
specifier: ^5.2.0
|
56 |
+
version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
|
57 |
'@sveltejs/kit':
|
58 |
specifier: ^2.5.27
|
59 |
+
version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
60 |
'@sveltejs/vite-plugin-svelte':
|
61 |
specifier: ^4.0.0
|
62 |
+
version: 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
63 |
'@tailwindcss/container-queries':
|
64 |
specifier: ^0.1.1
|
65 |
version: 0.1.1([email protected])
|
66 |
'@tailwindcss/postcss':
|
67 |
specifier: ^4.0.9
|
68 |
version: 4.0.9
|
69 |
+
'@types/node':
|
70 |
+
specifier: ^22.14.1
|
71 |
+
version: 22.14.1
|
72 |
clsx:
|
73 |
specifier: ^2.1.1
|
74 |
version: 2.1.1
|
75 |
+
dotenv:
|
76 |
+
specifier: ^16.5.0
|
77 |
+
version: 16.5.0
|
78 |
eslint:
|
79 |
specifier: ^9.22.0
|
80 |
version: 9.22.0([email protected])
|
|
|
143 |
version: 22.1.0([email protected])
|
144 |
vite:
|
145 |
specifier: ^5.4.4
|
146 |
+
version: 5.4.14(@types/node@22.14.1)([email protected])
|
147 |
|
148 |
packages:
|
149 |
|
|
|
1007 |
'@types/[email protected]':
|
1008 |
resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
|
1009 |
|
1010 |
+
'@types/[email protected]':
|
1011 |
+
resolution: {integrity: sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==}
|
1012 |
+
|
1013 |
'@types/[email protected]':
|
1014 |
resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
|
1015 |
|
|
|
1293 |
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
1294 |
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
1295 |
|
1296 | |
1297 |
+
resolution: {integrity: sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==}
|
1298 |
+
engines: {node: '>=12'}
|
1299 |
+
|
1300 | |
1301 |
resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
|
1302 |
engines: {node: '>=4'}
|
|
|
2417 | |
2418 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
2419 |
|
2420 | |
2421 |
+
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
|
2422 |
+
|
2423 | |
2424 |
resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
|
2425 |
peerDependencies:
|
|
|
3103 |
'@rollup/[email protected]':
|
3104 |
optional: true
|
3105 |
|
3106 |
+
'@ryoppippi/[email protected](@samchon/[email protected])(@types/node@22.14.1)([email protected])([email protected])([email protected])([email protected])':
|
3107 |
dependencies:
|
3108 |
'@rollup/pluginutils': 5.1.4([email protected])
|
3109 |
consola: 3.4.0
|
|
|
3117 |
typescript: 5.6.3
|
3118 |
typia: 7.6.4(@samchon/[email protected])([email protected])
|
3119 |
unplugin: 1.16.1
|
3120 |
+
vite: 6.2.1(@types/node@22.14.1)([email protected])([email protected])([email protected])
|
3121 |
transitivePeerDependencies:
|
3122 |
- '@samchon/openapi'
|
3123 |
- '@types/node'
|
|
|
3141 |
dependencies:
|
3142 |
acorn: 8.14.0
|
3143 |
|
3144 |
+
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))':
|
3145 |
dependencies:
|
3146 |
+
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
3147 |
import-meta-resolve: 4.1.0
|
3148 |
|
3149 |
+
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))':
|
3150 |
dependencies:
|
3151 |
'@rollup/plugin-commonjs': 28.0.2([email protected])
|
3152 |
'@rollup/plugin-json': 6.1.0([email protected])
|
3153 |
'@rollup/plugin-node-resolve': 16.0.0([email protected])
|
3154 |
+
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
3155 |
rollup: 4.34.9
|
3156 |
|
3157 |
+
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
3158 |
dependencies:
|
3159 |
+
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
3160 |
'@types/cookie': 0.6.0
|
3161 |
cookie: 0.6.0
|
3162 |
devalue: 5.1.1
|
|
|
3169 |
set-cookie-parser: 2.7.1
|
3170 |
sirv: 3.0.1
|
3171 |
svelte: 5.28.2
|
3172 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
3173 |
|
3174 |
+
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
3175 |
dependencies:
|
3176 |
+
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
3177 |
debug: 4.4.0
|
3178 |
svelte: 5.28.2
|
3179 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
3180 |
transitivePeerDependencies:
|
3181 |
- supports-color
|
3182 |
|
3183 |
+
'@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
3184 |
dependencies:
|
3185 |
+
'@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
3186 |
debug: 4.4.0
|
3187 |
deepmerge: 4.3.1
|
3188 |
kleur: 4.1.5
|
3189 |
magic-string: 0.30.17
|
3190 |
svelte: 5.28.2
|
3191 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
3192 |
+
vitefu: 1.0.6([email protected](@types/node@22.14.1)([email protected]))
|
3193 |
transitivePeerDependencies:
|
3194 |
- supports-color
|
3195 |
|
|
|
3267 |
|
3268 |
'@types/[email protected]':
|
3269 |
dependencies:
|
3270 |
+
'@types/node': 22.14.1
|
3271 |
form-data: 4.0.2
|
3272 |
|
3273 |
'@types/[email protected]':
|
3274 |
dependencies:
|
3275 |
undici-types: 5.26.5
|
3276 |
|
3277 |
+
'@types/[email protected]':
|
3278 |
+
dependencies:
|
3279 |
+
undici-types: 6.21.0
|
3280 |
+
|
3281 |
'@types/[email protected]': {}
|
3282 |
|
3283 |
'@typescript-eslint/[email protected](@typescript-eslint/[email protected]([email protected]([email protected]))([email protected]))([email protected]([email protected]))([email protected])':
|
|
|
3544 |
|
3545 | |
3546 |
|
3547 |
+
[email protected]: {}
|
3548 |
+
|
3549 | |
3550 |
|
3551 | |
|
|
4358 |
'@protobufjs/path': 1.1.2
|
4359 |
'@protobufjs/pool': 1.1.0
|
4360 |
'@protobufjs/utf8': 1.1.0
|
4361 |
+
'@types/node': 22.14.1
|
4362 |
long: 5.3.1
|
4363 |
|
4364 | |
|
|
4663 |
|
4664 | |
4665 |
|
4666 |
+
[email protected]: {}
|
4667 |
+
|
4668 | |
4669 |
dependencies:
|
4670 |
'@antfu/install-pkg': 1.0.0
|
|
|
4693 |
|
4694 | |
4695 |
|
4696 |
+
[email protected](@types/node@22.14.1)([email protected]):
|
4697 |
dependencies:
|
4698 |
esbuild: 0.21.5
|
4699 |
postcss: 8.5.3
|
4700 |
rollup: 4.34.9
|
4701 |
optionalDependencies:
|
4702 |
+
'@types/node': 22.14.1
|
4703 |
fsevents: 2.3.3
|
4704 |
lightningcss: 1.29.1
|
4705 |
|
4706 |
+
[email protected](@types/node@22.14.1)([email protected])([email protected])([email protected]):
|
4707 |
dependencies:
|
4708 |
esbuild: 0.25.1
|
4709 |
postcss: 8.5.3
|
4710 |
rollup: 4.34.9
|
4711 |
optionalDependencies:
|
4712 |
+
'@types/node': 22.14.1
|
4713 |
fsevents: 2.3.3
|
4714 |
jiti: 2.4.2
|
4715 |
lightningcss: 1.29.1
|
4716 |
yaml: 2.7.0
|
4717 |
|
4718 |
+
[email protected]([email protected](@types/node@22.14.1)([email protected])):
|
4719 |
optionalDependencies:
|
4720 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
4721 |
|
4722 | |
4723 |
dependencies:
|
scripts/update-ctx-length.ts
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dotenv from "dotenv";
|
2 |
+
dotenv.config(); // Load .env file into process.env
|
3 |
+
|
4 |
+
import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type
|
5 |
+
import fs from "fs/promises";
|
6 |
+
import path from "path";
|
7 |
+
|
8 |
+
const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json");
|
9 |
+
|
10 |
+
async function runUpdate() {
|
11 |
+
console.log("Starting context length cache update...");
|
12 |
+
|
13 |
+
// Gather API keys from process.env
|
14 |
+
const apiKeys: ApiKeys = {
|
15 |
+
COHERE_API_KEY: process.env.COHERE_API_KEY,
|
16 |
+
TOGETHER_API_KEY: process.env.TOGETHER_API_KEY,
|
17 |
+
FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
|
18 |
+
HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY,
|
19 |
+
REPLICATE_API_KEY: process.env.REPLICATE_API_KEY,
|
20 |
+
NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
|
21 |
+
NOVITA_API_KEY: process.env.NOVITA_API_KEY,
|
22 |
+
SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY,
|
23 |
+
};
|
24 |
+
|
25 |
+
try {
|
26 |
+
// Fetch data from all supported providers concurrently, passing keys
|
27 |
+
const fetchedData = await fetchAllProviderData(apiKeys);
|
28 |
+
|
29 |
+
// Read existing manual/cached data
|
30 |
+
let existingData = {};
|
31 |
+
try {
|
32 |
+
const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
33 |
+
existingData = JSON.parse(currentCache);
|
34 |
+
} catch {
|
35 |
+
// Remove unused variable name
|
36 |
+
console.log("No existing cache file found or error reading, creating new one.");
|
37 |
+
}
|
38 |
+
|
39 |
+
// Merge fetched data with existing data (fetched data takes precedence)
|
40 |
+
const combinedData = { ...existingData, ...fetchedData };
|
41 |
+
|
42 |
+
// Write the combined data back to the file
|
43 |
+
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
44 |
+
await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8");
|
45 |
+
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
46 |
+
|
47 |
+
console.log("Context length cache update complete.");
|
48 |
+
console.log(`Cache file written to: ${CACHE_FILE_PATH}`);
|
49 |
+
} catch (error) {
|
50 |
+
console.error("Error during context length cache update:", error);
|
51 |
+
process.exit(1); // Exit with error code
|
52 |
+
}
|
53 |
+
}
|
54 |
+
|
55 |
+
runUpdate();
|
src/app.css
CHANGED
@@ -67,6 +67,18 @@
|
|
67 |
@apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
68 |
}
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
@utility custom-outline {
|
71 |
@apply outline-hidden;
|
72 |
@apply border-blue-500 ring ring-blue-500;
|
|
|
67 |
@apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
68 |
}
|
69 |
|
70 |
+
@utility btn-sm {
|
71 |
+
@apply flex h-[32px] items-center justify-center gap-1.5 rounded-md border border-gray-200 bg-white px-2.5 py-2 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
72 |
+
}
|
73 |
+
|
74 |
+
@utility btn-xs {
|
75 |
+
@apply flex h-[28px] items-center justify-center gap-1 rounded border border-gray-200 bg-white px-2 py-1.5 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
76 |
+
}
|
77 |
+
|
78 |
+
@utility btn-mini {
|
79 |
+
@apply flex h-[24px] items-center justify-center gap-0.5 rounded-sm border border-gray-200 bg-white px-1.5 py-1 text-[10px] font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-2 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
80 |
+
}
|
81 |
+
|
82 |
@utility custom-outline {
|
83 |
@apply outline-hidden;
|
84 |
@apply border-blue-500 ring ring-blue-500;
|
src/lib/components/inference-playground/generation-config-settings.ts
CHANGED
@@ -7,7 +7,7 @@ export type GenerationConfigKey = (typeof GENERATION_CONFIG_KEYS)[number];
|
|
7 |
export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
|
8 |
|
9 |
interface GenerationKeySettings {
|
10 |
-
default
|
11 |
step: number;
|
12 |
min: number;
|
13 |
max: number;
|
@@ -23,7 +23,6 @@ export const GENERATION_CONFIG_SETTINGS: Record<GenerationConfigKey, GenerationK
|
|
23 |
label: "Temperature",
|
24 |
},
|
25 |
max_tokens: {
|
26 |
-
default: 2048,
|
27 |
step: 256,
|
28 |
min: 0,
|
29 |
max: 8192, // changed dynamically based on model
|
|
|
7 |
export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
|
8 |
|
9 |
interface GenerationKeySettings {
|
10 |
+
default?: number;
|
11 |
step: number;
|
12 |
min: number;
|
13 |
max: number;
|
|
|
23 |
label: "Temperature",
|
24 |
},
|
25 |
max_tokens: {
|
|
|
26 |
step: 256,
|
27 |
min: 0,
|
28 |
max: 8192, // changed dynamically based on model
|
src/lib/components/inference-playground/generation-config.svelte
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
<script lang="ts">
|
2 |
-
import type
|
3 |
-
|
4 |
import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
|
5 |
-
import {
|
|
|
|
|
6 |
|
7 |
interface Props {
|
8 |
conversation: Conversation;
|
@@ -11,37 +13,58 @@
|
|
11 |
|
12 |
let { conversation = $bindable(), classNames = "" }: Props = $props();
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
</script>
|
17 |
|
18 |
<div class="flex flex-col gap-y-7 {classNames}">
|
19 |
{#each GENERATION_CONFIG_KEYS as key}
|
20 |
{@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
|
21 |
-
{@const
|
|
|
|
|
22 |
<div>
|
23 |
<div class="flex items-center justify-between">
|
24 |
-
<label for=
|
25 |
-
|
26 |
-
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
<input
|
28 |
-
|
29 |
-
|
30 |
{min}
|
31 |
{max}
|
32 |
{step}
|
33 |
bind:value={conversation.config[key]}
|
|
|
34 |
/>
|
35 |
-
|
36 |
-
<input
|
37 |
-
id="temperature-range"
|
38 |
-
type="range"
|
39 |
-
{min}
|
40 |
-
{max}
|
41 |
-
{step}
|
42 |
-
bind:value={conversation.config[key]}
|
43 |
-
class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
|
44 |
-
/>
|
45 |
</div>
|
46 |
{/each}
|
47 |
|
|
|
1 |
<script lang="ts">
|
2 |
+
import { type Conversation } from "$lib/types.js";
|
3 |
+
import { watch } from "runed";
|
4 |
import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
|
5 |
+
import { maxAllowedTokens } from "./utils.js";
|
6 |
+
import { isNumber } from "$lib/utils/is.js";
|
7 |
+
import IconX from "~icons/carbon/close";
|
8 |
|
9 |
interface Props {
|
10 |
conversation: Conversation;
|
|
|
13 |
|
14 |
let { conversation = $bindable(), classNames = "" }: Props = $props();
|
15 |
|
16 |
+
const maxTokens = $derived(maxAllowedTokens(conversation));
|
17 |
+
|
18 |
+
watch(
|
19 |
+
() => maxTokens,
|
20 |
+
() => {
|
21 |
+
const curr = conversation.config.max_tokens;
|
22 |
+
if (!curr || curr <= maxTokens) return;
|
23 |
+
conversation.config.max_tokens = maxTokens;
|
24 |
+
}
|
25 |
+
);
|
26 |
</script>
|
27 |
|
28 |
<div class="flex flex-col gap-y-7 {classNames}">
|
29 |
{#each GENERATION_CONFIG_KEYS as key}
|
30 |
{@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
|
31 |
+
{@const isMaxTokens = key === "max_tokens"}
|
32 |
+
{@const max = isMaxTokens ? maxTokens : GENERATION_CONFIG_SETTINGS[key].max}
|
33 |
+
|
34 |
<div>
|
35 |
<div class="flex items-center justify-between">
|
36 |
+
<label for={key} class="mb-2 block text-sm font-medium text-gray-900 dark:text-white">
|
37 |
+
{label}
|
38 |
+
</label>
|
39 |
+
<div class="flex items-center gap-2">
|
40 |
+
{#if !isMaxTokens || isNumber(conversation.config[key])}
|
41 |
+
<input
|
42 |
+
type="number"
|
43 |
+
class="w-20 rounded-sm border bg-transparent px-1 py-0.5 text-right text-sm dark:border-gray-700"
|
44 |
+
{min}
|
45 |
+
{max}
|
46 |
+
{step}
|
47 |
+
bind:value={conversation.config[key]}
|
48 |
+
/>
|
49 |
+
{/if}
|
50 |
+
{#if isMaxTokens && isNumber(conversation.config[key])}
|
51 |
+
<button class="btn-mini" onclick={() => (conversation.config[key] = undefined)}> <IconX /> </button>
|
52 |
+
{:else if isMaxTokens}
|
53 |
+
<button class="btn-mini" onclick={() => (conversation.config[key] = maxTokens / 2)}> set </button>
|
54 |
+
{/if}
|
55 |
+
</div>
|
56 |
+
</div>
|
57 |
+
{#if !isMaxTokens || isNumber(conversation.config[key])}
|
58 |
<input
|
59 |
+
id={key}
|
60 |
+
type="range"
|
61 |
{min}
|
62 |
{max}
|
63 |
{step}
|
64 |
bind:value={conversation.config[key]}
|
65 |
+
class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
|
66 |
/>
|
67 |
+
{/if}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
</div>
|
69 |
{/each}
|
70 |
|
src/lib/components/inference-playground/playground.svelte
CHANGED
@@ -6,7 +6,6 @@
|
|
6 |
import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
|
7 |
import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
|
8 |
import { Popover } from "melt/components";
|
9 |
-
import { watch } from "runed";
|
10 |
import typia from "typia";
|
11 |
import { default as IconDelete } from "~icons/carbon/trash-can";
|
12 |
import { showShareModal } from "../share-modal.svelte";
|
@@ -19,7 +18,7 @@
|
|
19 |
import ModelSelectorModal from "./model-selector-modal.svelte";
|
20 |
import ModelSelector from "./model-selector.svelte";
|
21 |
import ProjectSelect from "./project-select.svelte";
|
22 |
-
import {
|
23 |
|
24 |
import { iterate } from "$lib/utils/array.js";
|
25 |
import IconChatLeft from "~icons/carbon/align-box-bottom-left";
|
@@ -43,19 +42,6 @@
|
|
43 |
|
44 |
let selectCompareModelOpen = $state(false);
|
45 |
|
46 |
-
watch(
|
47 |
-
() => $state.snapshot(session.project),
|
48 |
-
() => {
|
49 |
-
session.project.conversations.forEach(async (c, i) => {
|
50 |
-
session.generationStats[i] = {
|
51 |
-
latency: 0,
|
52 |
-
...session.generationStats[i],
|
53 |
-
generatedTokensCount: await getTokens(c),
|
54 |
-
};
|
55 |
-
});
|
56 |
-
}
|
57 |
-
);
|
58 |
-
|
59 |
const systemPromptSupported = $derived(
|
60 |
session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
|
61 |
);
|
|
|
6 |
import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
|
7 |
import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
|
8 |
import { Popover } from "melt/components";
|
|
|
9 |
import typia from "typia";
|
10 |
import { default as IconDelete } from "~icons/carbon/trash-can";
|
11 |
import { showShareModal } from "../share-modal.svelte";
|
|
|
18 |
import ModelSelectorModal from "./model-selector-modal.svelte";
|
19 |
import ModelSelector from "./model-selector.svelte";
|
20 |
import ProjectSelect from "./project-select.svelte";
|
21 |
+
import { isSystemPromptSupported } from "./utils.js";
|
22 |
|
23 |
import { iterate } from "$lib/utils/array.js";
|
24 |
import IconChatLeft from "~icons/carbon/align-box-bottom-left";
|
|
|
42 |
|
43 |
let selectCompareModelOpen = $state(false);
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
const systemPromptSupported = $derived(
|
46 |
session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
|
47 |
);
|
src/lib/components/inference-playground/utils.ts
CHANGED
@@ -1,15 +1,18 @@
|
|
1 |
-
import
|
|
|
2 |
import {
|
3 |
isCustomModel,
|
|
|
4 |
type Conversation,
|
5 |
type ConversationMessage,
|
6 |
type CustomModel,
|
7 |
type Model,
|
8 |
} from "$lib/types.js";
|
|
|
|
|
9 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
10 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
11 |
-
import {
|
12 |
-
import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
|
13 |
import OpenAI from "openai";
|
14 |
|
15 |
type ChatCompletionInputMessageChunk =
|
@@ -48,6 +51,24 @@ type OpenAICompletionMetadata = {
|
|
48 |
|
49 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
|
52 |
const { model, systemMessage } = conversation;
|
53 |
|
@@ -88,6 +109,7 @@ function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal)
|
|
88 |
messages: messages.map(parseMessage),
|
89 |
provider: conversation.provider,
|
90 |
...conversation.config,
|
|
|
91 |
},
|
92 |
};
|
93 |
}
|
@@ -284,11 +306,20 @@ export async function getTokenizer(model: Model) {
|
|
284 |
}
|
285 |
}
|
286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
export async function getTokens(conversation: Conversation): Promise<number> {
|
288 |
const model = conversation.model;
|
289 |
-
if (isCustomModel(model)) return
|
290 |
const tokenizer = await getTokenizer(model);
|
291 |
-
if (tokenizer === null) return
|
292 |
|
293 |
// This is a simplified version - you might need to adjust based on your exact needs
|
294 |
let formattedText = "";
|
|
|
1 |
+
import ctxLengthData from "$lib/data/context_length.json";
|
2 |
+
import { token } from "$lib/state/token.svelte";
|
3 |
import {
|
4 |
isCustomModel,
|
5 |
+
isHFModel,
|
6 |
type Conversation,
|
7 |
type ConversationMessage,
|
8 |
type CustomModel,
|
9 |
type Model,
|
10 |
} from "$lib/types.js";
|
11 |
+
import { tryGet } from "$lib/utils/object.js";
|
12 |
+
import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
|
13 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
14 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
15 |
+
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
|
|
16 |
import OpenAI from "openai";
|
17 |
|
18 |
type ChatCompletionInputMessageChunk =
|
|
|
51 |
|
52 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
53 |
|
54 |
+
export function maxAllowedTokens(conversation: Conversation) {
|
55 |
+
const ctxLength = (() => {
|
56 |
+
const { provider, model } = conversation;
|
57 |
+
if (!provider || !isHFModel(model)) return;
|
58 |
+
|
59 |
+
const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId;
|
60 |
+
if (!idOnProvider) return;
|
61 |
+
|
62 |
+
const models = tryGet(ctxLengthData, provider);
|
63 |
+
if (!models) return;
|
64 |
+
|
65 |
+
return tryGet(models, idOnProvider) as number | undefined;
|
66 |
+
})();
|
67 |
+
|
68 |
+
if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
|
69 |
+
return ctxLength;
|
70 |
+
}
|
71 |
+
|
72 |
function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
|
73 |
const { model, systemMessage } = conversation;
|
74 |
|
|
|
109 |
messages: messages.map(parseMessage),
|
110 |
provider: conversation.provider,
|
111 |
...conversation.config,
|
112 |
+
// max_tokens: maxAllowedTokens(conversation) - currTokens,
|
113 |
},
|
114 |
};
|
115 |
}
|
|
|
306 |
}
|
307 |
}
|
308 |
|
309 |
+
// When you don't have access to a tokenizer, guesstimate
|
310 |
+
export function estimateTokens(conversation: Conversation) {
|
311 |
+
const content = conversation.messages.reduce((acc, curr) => {
|
312 |
+
return acc + (curr?.content ?? "");
|
313 |
+
}, "");
|
314 |
+
|
315 |
+
return content.length / 4; // 1 token ~ 4 characters
|
316 |
+
}
|
317 |
+
|
318 |
export async function getTokens(conversation: Conversation): Promise<number> {
|
319 |
const model = conversation.model;
|
320 |
+
if (isCustomModel(model)) return estimateTokens(conversation);
|
321 |
const tokenizer = await getTokenizer(model);
|
322 |
+
if (tokenizer === null) return estimateTokens(conversation);
|
323 |
|
324 |
// This is a simplified version - you might need to adjust based on your exact needs
|
325 |
let formattedText = "";
|
src/lib/data/context_length.json
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"replicate": {},
|
3 |
+
"sambanova": {
|
4 |
+
"DeepSeek-R1": 16384,
|
5 |
+
"DeepSeek-R1-Distill-Llama-70B": 131072,
|
6 |
+
"DeepSeek-V3-0324": 16384,
|
7 |
+
"E5-Mistral-7B-Instruct": 4096,
|
8 |
+
"Llama-4-Maverick-17B-128E-Instruct": 8192,
|
9 |
+
"Llama-4-Scout-17B-16E-Instruct": 8192,
|
10 |
+
"Meta-Llama-3.1-405B-Instruct": 16384,
|
11 |
+
"Meta-Llama-3.1-8B-Instruct": 16384,
|
12 |
+
"Meta-Llama-3.2-1B-Instruct": 16384,
|
13 |
+
"Meta-Llama-3.2-3B-Instruct": 4096,
|
14 |
+
"Meta-Llama-3.3-70B-Instruct": 131072,
|
15 |
+
"Meta-Llama-Guard-3-8B": 16384,
|
16 |
+
"QwQ-32B": 16384,
|
17 |
+
"Qwen2-Audio-7B-Instruct": 4096,
|
18 |
+
"Qwen3-32B": 8192
|
19 |
+
},
|
20 |
+
"nebius": {
|
21 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
|
22 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
23 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-fast": 131072,
|
24 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
25 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
|
26 |
+
"meta-llama/Llama-Guard-3-8B": 131072,
|
27 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF-fast": 131072,
|
28 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 131072,
|
29 |
+
"nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
|
30 |
+
"mistralai/Mistral-Nemo-Instruct-2407-fast": 128000,
|
31 |
+
"mistralai/Mistral-Nemo-Instruct-2407": 128000,
|
32 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1-fast": 32768,
|
33 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
34 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1-fast": 65536,
|
35 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1": 65536,
|
36 |
+
"allenai/OLMo-7B-Instruct-hf": 2048,
|
37 |
+
"microsoft/Phi-3-mini-4k-instruct-fast": 4096,
|
38 |
+
"microsoft/Phi-3-mini-4k-instruct": 4096,
|
39 |
+
"microsoft/Phi-3-medium-128k-instruct-fast": 131072,
|
40 |
+
"microsoft/Phi-3-medium-128k-instruct": 131072,
|
41 |
+
"google/gemma-2-2b-it-fast": 8192,
|
42 |
+
"google/gemma-2-2b-it": 8192,
|
43 |
+
"google/gemma-2-9b-it-fast": 8192,
|
44 |
+
"google/gemma-2-9b-it": 8192,
|
45 |
+
"google/gemma-2-27b-it-fast": 8192,
|
46 |
+
"google/gemma-2-27b-it": 8192,
|
47 |
+
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct-fast": 128000,
|
48 |
+
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 128000,
|
49 |
+
"Qwen/Qwen2.5-Coder-7B-fast": 32768,
|
50 |
+
"Qwen/Qwen2.5-Coder-7B": 32768,
|
51 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct-fast": 32768,
|
52 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct": 32768,
|
53 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
|
54 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
|
55 |
+
"Qwen/Qwen2.5-32B-Instruct-fast": 131072,
|
56 |
+
"Qwen/Qwen2.5-32B-Instruct": 131072,
|
57 |
+
"Qwen/Qwen2.5-72B-Instruct-fast": 131072,
|
58 |
+
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
59 |
+
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
60 |
+
"Qwen/Qwen2-VL-7B-Instruct": 32768,
|
61 |
+
"llava-hf/llava-1.5-7b-hf": 4096,
|
62 |
+
"llava-hf/llava-1.5-13b-hf": 4096,
|
63 |
+
"aaditya/Llama3-OpenBioLLM-8B": 8192,
|
64 |
+
"aaditya/Llama3-OpenBioLLM-70B": 8192,
|
65 |
+
"BAAI/bge-en-icl": 32768,
|
66 |
+
"BAAI/bge-multilingual-gemma2": 4096,
|
67 |
+
"intfloat/e5-mistral-7b-instruct": 32768,
|
68 |
+
"cognitivecomputations/dolphin-2.9.2-mixtral-8x22b": 65536,
|
69 |
+
"microsoft/Phi-3.5-MoE-instruct": 131072,
|
70 |
+
"microsoft/Phi-3.5-mini-instruct": 131072,
|
71 |
+
"Qwen/Qwen2.5-1.5B-Instruct": 32768,
|
72 |
+
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
73 |
+
"meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
|
74 |
+
"meta-llama/Llama-3.2-1B-Instruct": 131072,
|
75 |
+
"meta-llama/Llama-3.2-3B-Instruct": 131072,
|
76 |
+
"Qwen/QwQ-32B-Preview": 32768,
|
77 |
+
"Qwen/QVQ-72B-preview": 128000,
|
78 |
+
"microsoft/phi-4": 16384,
|
79 |
+
"deepseek-ai/DeepSeek-V3": 163840,
|
80 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
81 |
+
"NousResearch/Hermes-3-Llama-405B": 131072,
|
82 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
83 |
+
"deepseek-ai/DeepSeek-R1-fast": 163840,
|
84 |
+
"Qwen/QwQ-32B-fast": 131072,
|
85 |
+
"Qwen/QwQ-32B": 131072,
|
86 |
+
"nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
|
87 |
+
"mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072,
|
88 |
+
"google/gemma-3-27b-it": 131072,
|
89 |
+
"google/gemma-3-27b-it-fast": 131072,
|
90 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32000,
|
91 |
+
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
92 |
+
"deepseek-ai/DeepSeek-V3-0324-fast": 163840,
|
93 |
+
"black-forest-labs/flux-dev": 0,
|
94 |
+
"black-forest-labs/flux-schnell": 0,
|
95 |
+
"stability-ai/sdxl": 0
|
96 |
+
},
|
97 |
+
"novita": {
|
98 |
+
"deepseek/deepseek-prover-v2-671b": 160000,
|
99 |
+
"qwen/qwen3-235b-a22b-fp8": 128000,
|
100 |
+
"qwen/qwen3-30b-a3b-fp8": 128000,
|
101 |
+
"qwen/qwen3-32b-fp8": 128000,
|
102 |
+
"deepseek/deepseek-v3-0324": 128000,
|
103 |
+
"qwen/qwen2.5-vl-72b-instruct": 96000,
|
104 |
+
"deepseek/deepseek-v3-turbo": 64000,
|
105 |
+
"deepseek/deepseek-r1-turbo": 64000,
|
106 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
|
107 |
+
"google/gemma-3-27b-it": 32000,
|
108 |
+
"qwen/qwq-32b": 32768,
|
109 |
+
"Sao10K/L3-8B-Stheno-v3.2": 8192,
|
110 |
+
"gryphe/mythomax-l2-13b": 4096,
|
111 |
+
"meta-llama/llama-4-scout-17b-16e-instruct": 131072,
|
112 |
+
"deepseek/deepseek-r1-distill-llama-8b": 32000,
|
113 |
+
"deepseek/deepseek_v3": 64000,
|
114 |
+
"meta-llama/llama-3.1-8b-instruct": 16384,
|
115 |
+
"deepseek/deepseek-r1-distill-qwen-14b": 64000,
|
116 |
+
"meta-llama/llama-3.3-70b-instruct": 131072,
|
117 |
+
"qwen/qwen-2.5-72b-instruct": 32000,
|
118 |
+
"mistralai/mistral-nemo": 131072,
|
119 |
+
"deepseek/deepseek-r1-distill-qwen-32b": 64000,
|
120 |
+
"meta-llama/llama-3-8b-instruct": 8192,
|
121 |
+
"microsoft/wizardlm-2-8x22b": 65535,
|
122 |
+
"deepseek/deepseek-r1-distill-llama-70b": 32000,
|
123 |
+
"meta-llama/llama-3.1-70b-instruct": 32768,
|
124 |
+
"google/gemma-2-9b-it": 8192,
|
125 |
+
"mistralai/mistral-7b-instruct": 32768,
|
126 |
+
"meta-llama/llama-3-70b-instruct": 8192,
|
127 |
+
"deepseek/deepseek-r1": 64000,
|
128 |
+
"nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
129 |
+
"sao10k/l3-70b-euryale-v2.1": 8192,
|
130 |
+
"cognitivecomputations/dolphin-mixtral-8x22b": 16000,
|
131 |
+
"jondurbin/airoboros-l2-70b": 4096,
|
132 |
+
"sophosympatheia/midnight-rose-70b": 4096,
|
133 |
+
"sao10k/l3-8b-lunaris": 8192,
|
134 |
+
"qwen/qwen3-0.6b-fp8": 32000,
|
135 |
+
"qwen/qwen3-1.7b-fp8": 32000,
|
136 |
+
"qwen/qwen3-8b-fp8": 128000,
|
137 |
+
"qwen/qwen3-4b-fp8": 128000,
|
138 |
+
"qwen/qwen3-14b-fp8": 128000,
|
139 |
+
"thudm/glm-4-9b-0414": 32000,
|
140 |
+
"thudm/glm-z1-9b-0414": 32000,
|
141 |
+
"thudm/glm-z1-32b-0414": 32000,
|
142 |
+
"thudm/glm-4-32b-0414": 32000,
|
143 |
+
"thudm/glm-z1-rumination-32b-0414": 32000,
|
144 |
+
"qwen/qwen2.5-7b-instruct": 32000,
|
145 |
+
"meta-llama/llama-3.2-1b-instruct": 131000,
|
146 |
+
"meta-llama/llama-3.2-11b-vision-instruct": 32768,
|
147 |
+
"meta-llama/llama-3.2-3b-instruct": 32768,
|
148 |
+
"meta-llama/llama-3.1-8b-instruct-bf16": 8192,
|
149 |
+
"sao10k/l31-70b-euryale-v2.2": 8192
|
150 |
+
},
|
151 |
+
"fal": {
|
152 |
+
"fal/model-name": 4096
|
153 |
+
},
|
154 |
+
"cerebras": {
|
155 |
+
"cerebras/model-name": 8192
|
156 |
+
},
|
157 |
+
"hf-inference": {
|
158 |
+
"google/gemma-2-9b-it": 8192,
|
159 |
+
"meta-llama/Meta-Llama-3-8B-Instruct": 8192
|
160 |
+
},
|
161 |
+
"hyperbolic": {
|
162 |
+
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
163 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32768,
|
164 |
+
"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
|
165 |
+
"deepseek-ai/DeepSeek-V3": 131072,
|
166 |
+
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
167 |
+
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
168 |
+
"Qwen/QwQ-32B-Preview": 32768,
|
169 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
|
170 |
+
"meta-llama/Llama-3.2-3B-Instruct": 131072,
|
171 |
+
"NousResearch/Hermes-3-Llama-3.1-70B": 12288,
|
172 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131000,
|
173 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
174 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
175 |
+
"mistralai/Pixtral-12B-2409": 32768,
|
176 |
+
"Qwen/Qwen2.5-VL-7B-Instruct": 32768,
|
177 |
+
"meta-llama/Meta-Llama-3.1-405B": 32768,
|
178 |
+
"meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
|
179 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
180 |
+
"Qwen/QwQ-32B": 131072
|
181 |
+
},
|
182 |
+
"cohere": {
|
183 |
+
"embed-english-light-v3.0": 512,
|
184 |
+
"embed-multilingual-v2.0": 256,
|
185 |
+
"rerank-v3.5": 4096,
|
186 |
+
"embed-v4.0": 8192,
|
187 |
+
"rerank-english-v3.0": 4096,
|
188 |
+
"command-r": 128000,
|
189 |
+
"embed-english-light-v3.0-image": 0,
|
190 |
+
"embed-english-v3.0-image": 0,
|
191 |
+
"command-a-03-2025": 288000,
|
192 |
+
"command-nightly": 288000,
|
193 |
+
"command-r7b-12-2024": 128000,
|
194 |
+
"command-r-plus": 128000,
|
195 |
+
"c4ai-aya-vision-32b": 16384,
|
196 |
+
"command-r7b-arabic-02-2025": 128000,
|
197 |
+
"command-light-nightly": 4096,
|
198 |
+
"embed-english-v3.0": 512,
|
199 |
+
"embed-multilingual-light-v3.0-image": 0,
|
200 |
+
"embed-multilingual-v3.0-image": 0,
|
201 |
+
"c4ai-aya-expanse-32b": 128000,
|
202 |
+
"command": 4096,
|
203 |
+
"c4ai-aya-vision-8b": 16384
|
204 |
+
},
|
205 |
+
"together": {
|
206 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
|
207 |
+
"togethercomputer/m2-bert-80M-32k-retrieval": 32768,
|
208 |
+
"google/gemma-2-9b-it": 8192,
|
209 |
+
"cartesia/sonic": 0,
|
210 |
+
"Qwen/Qwen2.5-7B-Instruct-Turbo": 32768,
|
211 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
|
212 |
+
"meta-llama-llama-2-70b-hf": 4096,
|
213 |
+
"BAAI/bge-base-en-v1.5": 512,
|
214 |
+
"Gryphe/MythoMax-L2-13b": 4096,
|
215 |
+
"google/gemma-2-27b-it": 8192,
|
216 |
+
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
217 |
+
"meta-llama/LlamaGuard-2-8b": 8192,
|
218 |
+
"cartesia/sonic-2": 0,
|
219 |
+
"togethercomputer/m2-bert-80M-8k-retrieval": 8192,
|
220 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
|
221 |
+
"arcee-ai/maestro-reasoning": 131072,
|
222 |
+
"Qwen/QwQ-32B": 131072,
|
223 |
+
"togethercomputer/MoA-1": 32768,
|
224 |
+
"mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
225 |
+
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
|
226 |
+
"google/gemma-2b-it": 8192,
|
227 |
+
"mistralai/Mistral-Small-24B-Instruct-2501": 32768,
|
228 |
+
"Gryphe/MythoMax-L2-13b-Lite": 4096,
|
229 |
+
"meta-llama/Meta-Llama-Guard-3-8B": 8192,
|
230 |
+
"scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192,
|
231 |
+
"Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
|
232 |
+
"meta-llama/Llama-3-8b-chat-hf": 8192,
|
233 |
+
"arcee-ai/caller": 32768,
|
234 |
+
"togethercomputer/MoA-1-Turbo": 32768,
|
235 |
+
"mistralai/Mistral-7B-Instruct-v0.1": 32768,
|
236 |
+
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
237 |
+
"scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
|
238 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072,
|
239 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072,
|
240 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
241 |
+
"arcee-ai/virtuoso-medium-v2": 131072,
|
242 |
+
"arcee-ai/coder-large": 32768,
|
243 |
+
"arcee-ai/virtuoso-large": 131072,
|
244 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072,
|
245 |
+
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
|
246 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072,
|
247 |
+
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576,
|
248 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072,
|
249 |
+
"mistralai/Mixtral-8x7B-v0.1": 32768,
|
250 |
+
"meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576,
|
251 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
|
252 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
253 |
+
"arcee-ai/arcee-blitz": 32768,
|
254 |
+
"deepseek-ai/DeepSeek-V3-p-dp": 131072,
|
255 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072,
|
256 |
+
"deepseek-ai/DeepSeek-V3": 131072,
|
257 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
|
258 |
+
"Qwen/Qwen2-72B-Instruct": 32768,
|
259 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
260 |
+
"meta-llama/Llama-3-70b-chat-hf": 8192,
|
261 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
262 |
+
"Salesforce/Llama-Rank-V1": 8192,
|
263 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768,
|
264 |
+
"meta-llama/Llama-Vision-Free": 131072,
|
265 |
+
"meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
|
266 |
+
"meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
|
267 |
+
"Qwen/Qwen2.5-72B-Instruct-Turbo": 131072,
|
268 |
+
"arcee_ai/arcee-spotlight": 131072,
|
269 |
+
"meta-llama/Llama-2-70b-hf": 4096,
|
270 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32768
|
271 |
+
},
|
272 |
+
"fireworks-ai": {
|
273 |
+
"accounts/fireworks/models/qwq-32b": 131072,
|
274 |
+
"accounts/fireworks/models/qwen2-vl-72b-instruct": 32768,
|
275 |
+
"accounts/fireworks/models/deepseek-v3": 131072,
|
276 |
+
"accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
|
277 |
+
"accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
|
278 |
+
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct": 131072,
|
279 |
+
"accounts/fireworks/models/llama-v3-70b-instruct": 8192,
|
280 |
+
"accounts/fireworks/models/deepseek-v3-0324": 163840,
|
281 |
+
"accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000,
|
282 |
+
"accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576,
|
283 |
+
"accounts/fireworks/models/qwen3-30b-a3b": 131072,
|
284 |
+
"accounts/fireworks/models/llama4-scout-instruct-basic": 1048576,
|
285 |
+
"accounts/fireworks/models/deepseek-r1-basic": 163840,
|
286 |
+
"accounts/fireworks/models/qwen-qwq-32b-preview": 32768,
|
287 |
+
"accounts/fireworks/models/phi-3-vision-128k-instruct": 32064,
|
288 |
+
"accounts/fireworks/models/firesearch-ocr-v6": 131072,
|
289 |
+
"accounts/fireworks/models/llama-v3p3-70b-instruct": 131072,
|
290 |
+
"accounts/fireworks/models/deepseek-r1": 163840,
|
291 |
+
"accounts/yi-01-ai/models/yi-large": 32768,
|
292 |
+
"accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
|
293 |
+
"accounts/fireworks/models/llama-guard-3-8b": 131072,
|
294 |
+
"accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
|
295 |
+
"accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
|
296 |
+
"accounts/fireworks/models/qwen2p5-72b-instruct": 32768,
|
297 |
+
"accounts/perplexity/models/r1-1776": 163840
|
298 |
+
}
|
299 |
+
}
|
src/lib/server/providers/cohere.ts
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const COHERE_API_URL = "https://api.cohere.ai/v1/models";
|
4 |
+
|
5 |
+
// Accept apiKey as an argument
|
6 |
+
export async function fetchCohereData(apiKey: string | undefined): Promise<MaxTokensCache["cohere"]> {
|
7 |
+
if (!apiKey) {
|
8 |
+
console.warn("Cohere API key not provided. Skipping Cohere fetch.");
|
9 |
+
return {};
|
10 |
+
}
|
11 |
+
try {
|
12 |
+
const response = await fetch(COHERE_API_URL, {
|
13 |
+
headers: {
|
14 |
+
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
15 |
+
},
|
16 |
+
});
|
17 |
+
if (!response.ok) {
|
18 |
+
throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`);
|
19 |
+
}
|
20 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
21 |
+
const data: any = await response.json();
|
22 |
+
const modelsData: MaxTokensCache["cohere"] = {};
|
23 |
+
if (data?.models && Array.isArray(data.models)) {
|
24 |
+
for (const model of data.models) {
|
25 |
+
if (model.name && typeof model.context_length === "number") {
|
26 |
+
modelsData[model.name] = model.context_length;
|
27 |
+
}
|
28 |
+
}
|
29 |
+
}
|
30 |
+
return modelsData;
|
31 |
+
} catch (error) {
|
32 |
+
console.error("Error fetching Cohere data:", error);
|
33 |
+
return {};
|
34 |
+
}
|
35 |
+
}
|
src/lib/server/providers/fireworks.ts
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed
|
4 |
+
|
5 |
+
export async function fetchFireworksData(apiKey: string | undefined): Promise<MaxTokensCache["fireworks-ai"]> {
|
6 |
+
if (!apiKey) {
|
7 |
+
console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch.");
|
8 |
+
return {};
|
9 |
+
}
|
10 |
+
try {
|
11 |
+
const response = await fetch(FIREWORKS_API_URL, {
|
12 |
+
headers: {
|
13 |
+
Authorization: `Bearer ${apiKey}`,
|
14 |
+
},
|
15 |
+
});
|
16 |
+
if (!response.ok) {
|
17 |
+
throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`);
|
18 |
+
}
|
19 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
20 |
+
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
21 |
+
const modelsData: MaxTokensCache["fireworks-ai"] = {};
|
22 |
+
|
23 |
+
// Check if data and data.data exist and are an array
|
24 |
+
if (data?.data && Array.isArray(data.data)) {
|
25 |
+
for (const model of data.data) {
|
26 |
+
// Check for common context length fields (OpenAI uses context_window)
|
27 |
+
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
28 |
+
// Fireworks uses model.id
|
29 |
+
if (model.id && typeof contextLength === "number") {
|
30 |
+
modelsData[model.id] = contextLength;
|
31 |
+
}
|
32 |
+
}
|
33 |
+
} else {
|
34 |
+
console.warn("Unexpected response structure from Fireworks AI API:", data);
|
35 |
+
}
|
36 |
+
return modelsData;
|
37 |
+
} catch (error) {
|
38 |
+
console.error("Error fetching Fireworks AI data:", error);
|
39 |
+
return {}; // Return empty on error
|
40 |
+
}
|
41 |
+
}
|
src/lib/server/providers/hyperbolic.ts
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed
|
4 |
+
|
5 |
+
export async function fetchHyperbolicData(apiKey: string | undefined): Promise<MaxTokensCache["hyperbolic"]> {
|
6 |
+
if (!apiKey) {
|
7 |
+
console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch.");
|
8 |
+
return {};
|
9 |
+
}
|
10 |
+
try {
|
11 |
+
const response = await fetch(HYPERBOLIC_API_URL, {
|
12 |
+
headers: {
|
13 |
+
Authorization: `Bearer ${apiKey}`,
|
14 |
+
},
|
15 |
+
});
|
16 |
+
if (!response.ok) {
|
17 |
+
throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`);
|
18 |
+
}
|
19 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
20 |
+
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
21 |
+
const modelsData: MaxTokensCache["hyperbolic"] = {};
|
22 |
+
|
23 |
+
// Check if data and data.data exist and are an array
|
24 |
+
if (data?.data && Array.isArray(data.data)) {
|
25 |
+
for (const model of data.data) {
|
26 |
+
// Check for common context length fields (OpenAI uses context_window)
|
27 |
+
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
28 |
+
// Assuming Hyperbolic uses model.id
|
29 |
+
if (model.id && typeof contextLength === "number") {
|
30 |
+
modelsData[model.id] = contextLength;
|
31 |
+
}
|
32 |
+
}
|
33 |
+
} else {
|
34 |
+
console.warn("Unexpected response structure from Hyperbolic API:", data);
|
35 |
+
}
|
36 |
+
return modelsData;
|
37 |
+
} catch (error) {
|
38 |
+
console.error("Error fetching Hyperbolic data:", error);
|
39 |
+
return {}; // Return empty on error
|
40 |
+
}
|
41 |
+
}
|
src/lib/server/providers/index.ts
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fs from "fs/promises";
|
2 |
+
import path from "path";
|
3 |
+
import { fetchCohereData } from "./cohere.js";
|
4 |
+
import { fetchTogetherData } from "./together.js";
|
5 |
+
import { fetchFireworksData } from "./fireworks.js";
|
6 |
+
import { fetchHyperbolicData } from "./hyperbolic.js";
|
7 |
+
import { fetchReplicateData } from "./replicate.js";
|
8 |
+
import { fetchNebiusData } from "./nebius.js";
|
9 |
+
import { fetchNovitaData } from "./novita.js";
|
10 |
+
import { fetchSambanovaData } from "./sambanova.js";
|
11 |
+
|
12 |
+
// --- Constants ---
|
13 |
+
const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json");
|
14 |
+
|
15 |
+
// --- Types ---
|
16 |
+
export interface MaxTokensCache {
|
17 |
+
[provider: string]: {
|
18 |
+
[modelId: string]: number;
|
19 |
+
};
|
20 |
+
}
|
21 |
+
|
22 |
+
// Type for API keys object passed to fetchAllProviderData
|
23 |
+
export interface ApiKeys {
|
24 |
+
COHERE_API_KEY?: string;
|
25 |
+
TOGETHER_API_KEY?: string;
|
26 |
+
FIREWORKS_API_KEY?: string;
|
27 |
+
HYPERBOLIC_API_KEY?: string;
|
28 |
+
REPLICATE_API_KEY?: string;
|
29 |
+
NEBIUS_API_KEY?: string;
|
30 |
+
NOVITA_API_KEY?: string;
|
31 |
+
SAMBANOVA_API_KEY?: string;
|
32 |
+
}
|
33 |
+
|
34 |
+
// --- Cache Handling ---
|
35 |
+
// (readCache and updateCache remain the same)
|
36 |
+
let memoryCache: MaxTokensCache | null = null;
|
37 |
+
let cacheReadPromise: Promise<MaxTokensCache> | null = null;
|
38 |
+
|
39 |
+
async function readCache(): Promise<MaxTokensCache> {
|
40 |
+
if (memoryCache) {
|
41 |
+
return memoryCache;
|
42 |
+
}
|
43 |
+
if (cacheReadPromise) {
|
44 |
+
return cacheReadPromise;
|
45 |
+
}
|
46 |
+
cacheReadPromise = (async () => {
|
47 |
+
try {
|
48 |
+
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
49 |
+
memoryCache = JSON.parse(data) as MaxTokensCache;
|
50 |
+
return memoryCache!;
|
51 |
+
} catch (error: unknown) {
|
52 |
+
if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") {
|
53 |
+
console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`);
|
54 |
+
memoryCache = {};
|
55 |
+
return {};
|
56 |
+
}
|
57 |
+
console.error("Error reading context length cache file:", error);
|
58 |
+
memoryCache = {};
|
59 |
+
return {};
|
60 |
+
} finally {
|
61 |
+
cacheReadPromise = null;
|
62 |
+
}
|
63 |
+
})();
|
64 |
+
return cacheReadPromise;
|
65 |
+
}
|
66 |
+
|
67 |
+
const isBrowser = typeof window !== "undefined";
|
68 |
+
|
69 |
+
function serverLog(...txt: unknown[]) {
|
70 |
+
if (isBrowser) return;
|
71 |
+
console.log(...txt);
|
72 |
+
}
|
73 |
+
|
74 |
+
function serverError(...txt: unknown[]) {
|
75 |
+
if (isBrowser) return;
|
76 |
+
console.error(...txt);
|
77 |
+
}
|
78 |
+
|
79 |
+
async function updateCache(provider: string, modelId: string, maxTokens: number): Promise<void> {
|
80 |
+
try {
|
81 |
+
let cache: MaxTokensCache;
|
82 |
+
try {
|
83 |
+
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
84 |
+
cache = JSON.parse(data) as MaxTokensCache;
|
85 |
+
} catch (readError: unknown) {
|
86 |
+
if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") {
|
87 |
+
cache = {};
|
88 |
+
} else {
|
89 |
+
throw readError;
|
90 |
+
}
|
91 |
+
}
|
92 |
+
if (!cache[provider]) {
|
93 |
+
cache[provider] = {};
|
94 |
+
}
|
95 |
+
cache[provider][modelId] = maxTokens;
|
96 |
+
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
97 |
+
await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8");
|
98 |
+
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
99 |
+
memoryCache = cache;
|
100 |
+
serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`);
|
101 |
+
} catch (error) {
|
102 |
+
serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error);
|
103 |
+
memoryCache = null;
|
104 |
+
}
|
105 |
+
}
|
106 |
+
|
107 |
+
// --- Main Exported Function ---
|
108 |
+
// Now accepts apiKey as the third argument
|
109 |
+
export async function getMaxTokens(
|
110 |
+
provider: string,
|
111 |
+
modelId: string,
|
112 |
+
apiKey: string | undefined
|
113 |
+
): Promise<number | null> {
|
114 |
+
const cache = await readCache();
|
115 |
+
const cachedValue = cache[provider]?.[modelId];
|
116 |
+
|
117 |
+
if (cachedValue !== undefined) {
|
118 |
+
return cachedValue;
|
119 |
+
}
|
120 |
+
|
121 |
+
serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`);
|
122 |
+
|
123 |
+
let liveData: number | null = null;
|
124 |
+
let fetchedProviderData: MaxTokensCache[string] | null = null;
|
125 |
+
|
126 |
+
try {
|
127 |
+
// Pass the received apiKey to the fetcher functions
|
128 |
+
switch (provider) {
|
129 |
+
case "cohere":
|
130 |
+
fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey
|
131 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
132 |
+
break;
|
133 |
+
case "together":
|
134 |
+
fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey
|
135 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
136 |
+
break;
|
137 |
+
case "fireworks-ai":
|
138 |
+
fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey
|
139 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
140 |
+
break;
|
141 |
+
case "hyperbolic":
|
142 |
+
fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey
|
143 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
144 |
+
break;
|
145 |
+
case "replicate":
|
146 |
+
fetchedProviderData = await fetchReplicateData(apiKey);
|
147 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
148 |
+
break;
|
149 |
+
case "nebius":
|
150 |
+
fetchedProviderData = await fetchNebiusData(apiKey);
|
151 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
152 |
+
break;
|
153 |
+
case "novita":
|
154 |
+
fetchedProviderData = await fetchNovitaData(apiKey);
|
155 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
156 |
+
break;
|
157 |
+
case "sambanova":
|
158 |
+
fetchedProviderData = await fetchSambanovaData(apiKey);
|
159 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
160 |
+
break;
|
161 |
+
default:
|
162 |
+
serverLog(`Live fetch not supported or implemented for provider: ${provider}`);
|
163 |
+
return null;
|
164 |
+
}
|
165 |
+
|
166 |
+
if (liveData !== null) {
|
167 |
+
serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`);
|
168 |
+
updateCache(provider, modelId, liveData).catch(err => {
|
169 |
+
serverError(`Async cache update failed for ${provider} - ${modelId}:`, err);
|
170 |
+
});
|
171 |
+
return liveData;
|
172 |
+
} else {
|
173 |
+
serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`);
|
174 |
+
return null;
|
175 |
+
}
|
176 |
+
} catch (error) {
|
177 |
+
serverError(`Error during live fetch for ${provider} - ${modelId}:`, error);
|
178 |
+
return null;
|
179 |
+
}
|
180 |
+
}
|
181 |
+
|
182 |
+
// --- Helper for Build Script ---
|
183 |
+
// Now accepts an apiKeys object
|
184 |
+
export async function fetchAllProviderData(apiKeys: ApiKeys): Promise<MaxTokensCache> {
|
185 |
+
serverLog("Fetching data for all providers...");
|
186 |
+
const results: MaxTokensCache = {};
|
187 |
+
|
188 |
+
// Define fetchers, passing the specific key from the apiKeys object
|
189 |
+
const providerFetchers = [
|
190 |
+
{ name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) },
|
191 |
+
{ name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) },
|
192 |
+
{ name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) },
|
193 |
+
{ name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) },
|
194 |
+
{ name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) },
|
195 |
+
{ name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) },
|
196 |
+
{ name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) },
|
197 |
+
{ name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) },
|
198 |
+
];
|
199 |
+
|
200 |
+
const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher()));
|
201 |
+
|
202 |
+
settledResults.forEach((result, index) => {
|
203 |
+
const providerInfo = providerFetchers[index];
|
204 |
+
if (!providerInfo) {
|
205 |
+
serverError(`Error: No provider info found for index ${index}`);
|
206 |
+
return;
|
207 |
+
}
|
208 |
+
const providerName = providerInfo.name;
|
209 |
+
|
210 |
+
if (result.status === "fulfilled" && result.value) {
|
211 |
+
if (Object.keys(result.value).length > 0) {
|
212 |
+
results[providerName] = result.value;
|
213 |
+
serverLog(`Successfully fetched data for ${providerName}`);
|
214 |
+
} else {
|
215 |
+
serverLog(`No data returned for ${providerName}.`);
|
216 |
+
}
|
217 |
+
} else if (result.status === "rejected") {
|
218 |
+
serverError(`Error fetching ${providerName} data:`, result.reason);
|
219 |
+
}
|
220 |
+
});
|
221 |
+
|
222 |
+
serverLog("Finished fetching provider data.");
|
223 |
+
return results;
|
224 |
+
}
|
src/lib/server/providers/nebius.ts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
interface NebiusModel {
|
4 |
+
id: string;
|
5 |
+
config?: {
|
6 |
+
max_tokens?: number;
|
7 |
+
};
|
8 |
+
context_length?: number;
|
9 |
+
}
|
10 |
+
|
11 |
+
interface NebiusResponse {
|
12 |
+
data?: NebiusModel[];
|
13 |
+
}
|
14 |
+
|
15 |
+
const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true";
|
16 |
+
|
17 |
+
export async function fetchNebiusData(apiKey: string | undefined): Promise<MaxTokensCache["nebius"]> {
|
18 |
+
if (!apiKey) {
|
19 |
+
console.warn("Nebius API key not provided. Skipping Nebius fetch.");
|
20 |
+
return {};
|
21 |
+
}
|
22 |
+
try {
|
23 |
+
const response = await fetch(NEBIUS_API_URL, {
|
24 |
+
headers: {
|
25 |
+
Authorization: `Bearer ${apiKey}`,
|
26 |
+
},
|
27 |
+
});
|
28 |
+
if (!response.ok) {
|
29 |
+
throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`);
|
30 |
+
}
|
31 |
+
const data: NebiusResponse = await response.json();
|
32 |
+
const modelsData: MaxTokensCache["nebius"] = {};
|
33 |
+
|
34 |
+
if (data?.data && Array.isArray(data.data)) {
|
35 |
+
for (const model of data.data) {
|
36 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
37 |
+
if (model.id && typeof contextLength === "number") {
|
38 |
+
modelsData[model.id] = contextLength;
|
39 |
+
}
|
40 |
+
}
|
41 |
+
} else {
|
42 |
+
console.warn("Unexpected response structure from Nebius API:", data);
|
43 |
+
}
|
44 |
+
return modelsData;
|
45 |
+
} catch (error) {
|
46 |
+
console.error("Error fetching Nebius data:", error);
|
47 |
+
return {};
|
48 |
+
}
|
49 |
+
}
|
src/lib/server/providers/novita.ts
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models";
|
4 |
+
|
5 |
+
interface NovitaModel {
|
6 |
+
id: string;
|
7 |
+
object: string;
|
8 |
+
context_size: number;
|
9 |
+
}
|
10 |
+
|
11 |
+
interface NovitaResponse {
|
12 |
+
data: NovitaModel[];
|
13 |
+
}
|
14 |
+
|
15 |
+
export async function fetchNovitaData(apiKey: string | undefined): Promise<MaxTokensCache["novita"]> {
|
16 |
+
if (!apiKey) {
|
17 |
+
console.warn("Novita API key not provided. Skipping Novita fetch.");
|
18 |
+
return {};
|
19 |
+
}
|
20 |
+
try {
|
21 |
+
const response = await fetch(NOVITA_API_URL, {
|
22 |
+
headers: {
|
23 |
+
Authorization: `Bearer ${apiKey}`,
|
24 |
+
},
|
25 |
+
});
|
26 |
+
if (!response.ok) {
|
27 |
+
throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`);
|
28 |
+
}
|
29 |
+
const data: NovitaResponse = await response.json();
|
30 |
+
const modelsData: MaxTokensCache["novita"] = {};
|
31 |
+
|
32 |
+
if (data?.data && Array.isArray(data.data)) {
|
33 |
+
for (const model of data.data) {
|
34 |
+
if (model.id && typeof model.context_size === "number") {
|
35 |
+
modelsData[model.id] = model.context_size;
|
36 |
+
}
|
37 |
+
}
|
38 |
+
} else {
|
39 |
+
console.warn("Unexpected response structure from Novita API:", data);
|
40 |
+
}
|
41 |
+
return modelsData;
|
42 |
+
} catch (error) {
|
43 |
+
console.error("Error fetching Novita data:", error);
|
44 |
+
return {};
|
45 |
+
}
|
46 |
+
}
|
src/lib/server/providers/replicate.ts
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const REPLICATE_API_URL = "https://api.replicate.com/v1/models";
|
4 |
+
|
5 |
+
export async function fetchReplicateData(apiKey: string | undefined): Promise<MaxTokensCache["replicate"]> {
|
6 |
+
if (!apiKey) {
|
7 |
+
console.warn("Replicate API key not provided. Skipping Replicate fetch.");
|
8 |
+
return {};
|
9 |
+
}
|
10 |
+
try {
|
11 |
+
const response = await fetch(REPLICATE_API_URL, {
|
12 |
+
headers: {
|
13 |
+
Authorization: `Token ${apiKey}`,
|
14 |
+
},
|
15 |
+
});
|
16 |
+
if (!response.ok) {
|
17 |
+
throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`);
|
18 |
+
}
|
19 |
+
const data = await response.json();
|
20 |
+
const modelsData: MaxTokensCache["replicate"] = {};
|
21 |
+
|
22 |
+
if (data?.results && Array.isArray(data.results)) {
|
23 |
+
for (const model of data.results) {
|
24 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
25 |
+
if (model.id && typeof contextLength === "number") {
|
26 |
+
modelsData[model.id] = contextLength;
|
27 |
+
}
|
28 |
+
}
|
29 |
+
} else {
|
30 |
+
console.warn("Unexpected response structure from Replicate API:", data);
|
31 |
+
}
|
32 |
+
return modelsData;
|
33 |
+
} catch (error) {
|
34 |
+
console.error("Error fetching Replicate data:", error);
|
35 |
+
return {};
|
36 |
+
}
|
37 |
+
}
|
src/lib/server/providers/sambanova.ts
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models";
|
4 |
+
|
5 |
+
interface SambanovaModel {
|
6 |
+
id: string;
|
7 |
+
object: string;
|
8 |
+
context_length: number;
|
9 |
+
max_completion_tokens?: number;
|
10 |
+
pricing?: {
|
11 |
+
prompt: string;
|
12 |
+
completion: string;
|
13 |
+
};
|
14 |
+
}
|
15 |
+
|
16 |
+
interface SambanovaResponse {
|
17 |
+
data: SambanovaModel[];
|
18 |
+
object: string;
|
19 |
+
}
|
20 |
+
|
21 |
+
export async function fetchSambanovaData(apiKey: string | undefined): Promise<MaxTokensCache["sambanova"]> {
|
22 |
+
if (!apiKey) {
|
23 |
+
console.warn("SambaNova API key not provided. Skipping SambaNova fetch.");
|
24 |
+
return {};
|
25 |
+
}
|
26 |
+
try {
|
27 |
+
const response = await fetch(SAMBANOVA_API_URL, {
|
28 |
+
headers: {
|
29 |
+
Authorization: `Bearer ${apiKey}`,
|
30 |
+
},
|
31 |
+
});
|
32 |
+
if (!response.ok) {
|
33 |
+
throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`);
|
34 |
+
}
|
35 |
+
const data: SambanovaResponse = await response.json();
|
36 |
+
const modelsData: MaxTokensCache["sambanova"] = {};
|
37 |
+
|
38 |
+
if (data?.data && Array.isArray(data.data)) {
|
39 |
+
for (const model of data.data) {
|
40 |
+
if (model.id && typeof model.context_length === "number") {
|
41 |
+
modelsData[model.id] = model.context_length;
|
42 |
+
}
|
43 |
+
}
|
44 |
+
} else {
|
45 |
+
console.warn("Unexpected response structure from SambaNova API:", data);
|
46 |
+
}
|
47 |
+
return modelsData;
|
48 |
+
} catch (error) {
|
49 |
+
console.error("Error fetching SambaNova data:", error);
|
50 |
+
return {};
|
51 |
+
}
|
52 |
+
}
|
src/lib/server/providers/together.ts
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MaxTokensCache } from "./index.js";
|
2 |
+
|
3 |
+
const TOGETHER_API_URL = "https://api.together.xyz/v1/models";
|
4 |
+
|
5 |
+
// Accept apiKey as an argument
|
6 |
+
export async function fetchTogetherData(apiKey: string | undefined): Promise<MaxTokensCache["together"]> {
|
7 |
+
if (!apiKey) {
|
8 |
+
console.warn("Together AI API key not provided. Skipping Together AI fetch.");
|
9 |
+
return {};
|
10 |
+
}
|
11 |
+
try {
|
12 |
+
const response = await fetch(TOGETHER_API_URL, {
|
13 |
+
headers: {
|
14 |
+
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
15 |
+
},
|
16 |
+
});
|
17 |
+
if (!response.ok) {
|
18 |
+
throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`);
|
19 |
+
}
|
20 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
21 |
+
const data: any[] = await response.json();
|
22 |
+
const modelsData: MaxTokensCache["together"] = {};
|
23 |
+
|
24 |
+
if (Array.isArray(data)) {
|
25 |
+
for (const model of data) {
|
26 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
27 |
+
if (model.id && typeof contextLength === "number") {
|
28 |
+
modelsData[model.id] = contextLength;
|
29 |
+
}
|
30 |
+
}
|
31 |
+
}
|
32 |
+
return modelsData;
|
33 |
+
} catch (error) {
|
34 |
+
console.error("Error fetching Together AI data:", error);
|
35 |
+
return {};
|
36 |
+
}
|
37 |
+
}
|
src/lib/state/generation-stats.svelte.ts
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { getTokens } from "$lib/components/inference-playground/utils.js";
|
2 |
+
import { watch } from "runed";
|
3 |
+
import { session } from "./session.svelte";
|
4 |
+
|
5 |
+
export interface GenerationStats {
|
6 |
+
latency: number;
|
7 |
+
generatedTokensCount: number;
|
8 |
+
}
|
9 |
+
|
10 |
+
function createGenerationStats() {
|
11 |
+
let stats = $state([] as Array<GenerationStats>);
|
12 |
+
|
13 |
+
const init = () => {
|
14 |
+
watch(
|
15 |
+
() => $state.snapshot(session.project),
|
16 |
+
() => {
|
17 |
+
session.project.conversations.forEach(async (c, i) => {
|
18 |
+
generationStats[i] = { latency: 0, ...generationStats[i], generatedTokensCount: await getTokens(c) };
|
19 |
+
});
|
20 |
+
}
|
21 |
+
);
|
22 |
+
};
|
23 |
+
|
24 |
+
const set = (s: Array<GenerationStats>) => {
|
25 |
+
stats = s;
|
26 |
+
};
|
27 |
+
|
28 |
+
return Object.assign(stats, { set, init });
|
29 |
+
}
|
30 |
+
|
31 |
+
export const generationStats = createGenerationStats();
|
src/lib/types.ts
CHANGED
@@ -27,6 +27,7 @@ export type ConversationWithHFModel = Conversation & {
|
|
27 |
export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
|
28 |
export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
|
29 |
|
|
|
30 |
export const isCustomModel = typia.createIs<CustomModel>();
|
31 |
|
32 |
export type Project = {
|
|
|
27 |
export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
|
28 |
export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
|
29 |
|
30 |
+
export const isHFModel = typia.createIs<Model>();
|
31 |
export const isCustomModel = typia.createIs<CustomModel>();
|
32 |
|
33 |
export type Project = {
|
src/lib/utils/is.ts
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import { SvelteSet } from "svelte/reactivity";
|
|
|
2 |
|
3 |
export function isHtmlElement(element: unknown): element is HTMLElement {
|
4 |
return element instanceof HTMLElement;
|
@@ -35,3 +36,5 @@ export function isTouch(event: PointerEvent): boolean {
|
|
35 |
export function isPromise(value: unknown): value is Promise<unknown> {
|
36 |
return value instanceof Promise;
|
37 |
}
|
|
|
|
|
|
1 |
import { SvelteSet } from "svelte/reactivity";
|
2 |
+
import typia from "typia";
|
3 |
|
4 |
export function isHtmlElement(element: unknown): element is HTMLElement {
|
5 |
return element instanceof HTMLElement;
|
|
|
36 |
export function isPromise(value: unknown): value is Promise<unknown> {
|
37 |
return value instanceof Promise;
|
38 |
}
|
39 |
+
|
40 |
+
export const isNumber = typia.createIs<number>();
|
src/lib/utils/object.ts
CHANGED
@@ -32,3 +32,12 @@ export function pick<T extends Record<string, unknown>, K extends keyof T>(obj:
|
|
32 |
}
|
33 |
return result;
|
34 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
return result;
|
34 |
}
|
35 |
+
|
36 |
+
/**
|
37 |
+
* Try and get a value from an object, or return undefined.
|
38 |
+
* The key does not need to match the type of the object, so the
|
39 |
+
* returned type is an union of all values, and undefined
|
40 |
+
*/
|
41 |
+
export function tryGet<T extends Record<string, unknown>>(obj: T, key: string): T[keyof T] | undefined {
|
42 |
+
return obj[key as keyof T];
|
43 |
+
}
|
src/routes/+layout.svelte
CHANGED
@@ -6,6 +6,7 @@
|
|
6 |
import ShareModal from "$lib/components/share-modal.svelte";
|
7 |
import "../app.css";
|
8 |
import { session } from "$lib/state/session.svelte";
|
|
|
9 |
|
10 |
interface Props {
|
11 |
children?: import("svelte").Snippet;
|
@@ -13,6 +14,7 @@
|
|
13 |
|
14 |
let { children }: Props = $props();
|
15 |
session.init();
|
|
|
16 |
</script>
|
17 |
|
18 |
{@render children?.()}
|
|
|
6 |
import ShareModal from "$lib/components/share-modal.svelte";
|
7 |
import "../app.css";
|
8 |
import { session } from "$lib/state/session.svelte";
|
9 |
+
import { generationStats } from "$lib/state/generation-stats.svelte";
|
10 |
|
11 |
interface Props {
|
12 |
children?: import("svelte").Snippet;
|
|
|
14 |
|
15 |
let { children }: Props = $props();
|
16 |
session.init();
|
17 |
+
generationStats.init();
|
18 |
</script>
|
19 |
|
20 |
{@render children?.()}
|