add supprot for lepton (#1866)
Browse files### What problem does this PR solve?
add supprot for lepton
#1853
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Co-authored-by: Zhedong Cen <[email protected]>
conf/llm_factories.json
CHANGED
|
@@ -2326,6 +2326,104 @@
|
|
| 2326 |
"model_type": "rerank"
|
| 2327 |
}
|
| 2328 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2329 |
}
|
| 2330 |
]
|
| 2331 |
}
|
|
|
|
| 2326 |
"model_type": "rerank"
|
| 2327 |
}
|
| 2328 |
]
|
| 2329 |
+
},
|
| 2330 |
+
{
|
| 2331 |
+
"name": "Lepton",
|
| 2332 |
+
"logo": "",
|
| 2333 |
+
"tags": "LLM",
|
| 2334 |
+
"status": "1",
|
| 2335 |
+
"llm": [
|
| 2336 |
+
{
|
| 2337 |
+
"llm_name": "dolphin-mixtral-8x7b",
|
| 2338 |
+
"tags": "LLM,CHAT,32k",
|
| 2339 |
+
"max_tokens": 32768,
|
| 2340 |
+
"model_type": "chat"
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"llm_name": "gemma-7b",
|
| 2344 |
+
"tags": "LLM,CHAT,8k",
|
| 2345 |
+
"max_tokens": 8192,
|
| 2346 |
+
"model_type": "chat"
|
| 2347 |
+
},
|
| 2348 |
+
{
|
| 2349 |
+
"llm_name": "llama3-1-8b",
|
| 2350 |
+
"tags": "LLM,CHAT,4k",
|
| 2351 |
+
"max_tokens": 4096,
|
| 2352 |
+
"model_type": "chat"
|
| 2353 |
+
},
|
| 2354 |
+
{
|
| 2355 |
+
"llm_name": "llama3-8b",
|
| 2356 |
+
"tags": "LLM,CHAT,8K",
|
| 2357 |
+
"max_tokens": 8192,
|
| 2358 |
+
"model_type": "chat"
|
| 2359 |
+
},
|
| 2360 |
+
{
|
| 2361 |
+
"llm_name": "llama2-13b",
|
| 2362 |
+
"tags": "LLM,CHAT,4K",
|
| 2363 |
+
"max_tokens": 4096,
|
| 2364 |
+
"model_type": "chat"
|
| 2365 |
+
},
|
| 2366 |
+
{
|
| 2367 |
+
"llm_name": "llama3-1-70b",
|
| 2368 |
+
"tags": "LLM,CHAT,8k",
|
| 2369 |
+
"max_tokens": 8192,
|
| 2370 |
+
"model_type": "chat"
|
| 2371 |
+
},
|
| 2372 |
+
{
|
| 2373 |
+
"llm_name": "llama3-70b",
|
| 2374 |
+
"tags": "LLM,CHAT,8k",
|
| 2375 |
+
"max_tokens": 8192,
|
| 2376 |
+
"model_type": "chat"
|
| 2377 |
+
},
|
| 2378 |
+
{
|
| 2379 |
+
"llm_name": "llama3-1-405b",
|
| 2380 |
+
"tags": "LLM,CHAT,8k",
|
| 2381 |
+
"max_tokens": 8192,
|
| 2382 |
+
"model_type": "chat"
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"llm_name": "mistral-7b",
|
| 2386 |
+
"tags": "LLM,CHAT,8K",
|
| 2387 |
+
"max_tokens": 8192,
|
| 2388 |
+
"model_type": "chat"
|
| 2389 |
+
},
|
| 2390 |
+
{
|
| 2391 |
+
"llm_name": "mistral-8x7b",
|
| 2392 |
+
"tags": "LLM,CHAT,8K",
|
| 2393 |
+
"max_tokens": 8192,
|
| 2394 |
+
"model_type": "chat"
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"llm_name": "nous-hermes-llama2",
|
| 2398 |
+
"tags": "LLM,CHAT,4k",
|
| 2399 |
+
"max_tokens": 4096,
|
| 2400 |
+
"model_type": "chat"
|
| 2401 |
+
},
|
| 2402 |
+
{
|
| 2403 |
+
"llm_name": "openchat-3-5",
|
| 2404 |
+
"tags": "LLM,CHAT,4k",
|
| 2405 |
+
"max_tokens": 4096,
|
| 2406 |
+
"model_type": "chat"
|
| 2407 |
+
},
|
| 2408 |
+
{
|
| 2409 |
+
"llm_name": "toppy-m-7b",
|
| 2410 |
+
"tags": "LLM,CHAT,4k",
|
| 2411 |
+
"max_tokens": 4096,
|
| 2412 |
+
"model_type": "chat"
|
| 2413 |
+
},
|
| 2414 |
+
{
|
| 2415 |
+
"llm_name": "wizardlm-2-7b",
|
| 2416 |
+
"tags": "LLM,CHAT,32k",
|
| 2417 |
+
"max_tokens": 32768,
|
| 2418 |
+
"model_type": "chat"
|
| 2419 |
+
},
|
| 2420 |
+
{
|
| 2421 |
+
"llm_name": "wizardlm-2-8x22b",
|
| 2422 |
+
"tags": "LLM,CHAT,64K",
|
| 2423 |
+
"max_tokens": 65536,
|
| 2424 |
+
"model_type": "chat"
|
| 2425 |
+
}
|
| 2426 |
+
]
|
| 2427 |
}
|
| 2428 |
]
|
| 2429 |
}
|
rag/llm/__init__.py
CHANGED
|
@@ -83,7 +83,8 @@ ChatModel = {
|
|
| 83 |
"NVIDIA": NvidiaChat,
|
| 84 |
"LM-Studio": LmStudioChat,
|
| 85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
| 86 |
-
"cohere": CoHereChat
|
|
|
|
| 87 |
}
|
| 88 |
|
| 89 |
|
|
|
|
| 83 |
"NVIDIA": NvidiaChat,
|
| 84 |
"LM-Studio": LmStudioChat,
|
| 85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
| 86 |
+
"cohere": CoHereChat,
|
| 87 |
+
"LeptonAI": LeptonAIChat
|
| 88 |
}
|
| 89 |
|
| 90 |
|
rag/llm/chat_model.py
CHANGED
|
@@ -71,7 +71,7 @@ class Base(ABC):
|
|
| 71 |
total_tokens
|
| 72 |
+ num_tokens_from_string(resp.choices[0].delta.content)
|
| 73 |
)
|
| 74 |
-
if not hasattr(resp, "usage")
|
| 75 |
else resp.usage["total_tokens"]
|
| 76 |
)
|
| 77 |
if resp.choices[0].finish_reason == "length":
|
|
@@ -981,3 +981,10 @@ class CoHereChat(Base):
|
|
| 981 |
yield ans + "\n**ERROR**: " + str(e)
|
| 982 |
|
| 983 |
yield total_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
total_tokens
|
| 72 |
+ num_tokens_from_string(resp.choices[0].delta.content)
|
| 73 |
)
|
| 74 |
+
if not hasattr(resp, "usage") or not resp.usage
|
| 75 |
else resp.usage["total_tokens"]
|
| 76 |
)
|
| 77 |
if resp.choices[0].finish_reason == "length":
|
|
|
|
| 981 |
yield ans + "\n**ERROR**: " + str(e)
|
| 982 |
|
| 983 |
yield total_tokens
|
| 984 |
+
|
| 985 |
+
|
| 986 |
+
class LeptonAIChat(Base):
|
| 987 |
+
def __init__(self, key, model_name, base_url=None):
|
| 988 |
+
if not base_url:
|
| 989 |
+
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
| 990 |
+
super().__init__(key, model_name, base_url)
|
web/src/assets/svg/llm/lepton.svg
ADDED
|
|
web/src/pages/user-setting/setting-model/constant.ts
CHANGED
|
@@ -23,7 +23,8 @@ export const IconMap = {
|
|
| 23 |
NVIDIA:'nvidia',
|
| 24 |
'LM-Studio':'lm-studio',
|
| 25 |
'OpenAI-API-Compatible':'openai-api',
|
| 26 |
-
'cohere':'cohere'
|
|
|
|
| 27 |
};
|
| 28 |
|
| 29 |
export const BedrockRegionList = [
|
|
|
|
| 23 |
NVIDIA:'nvidia',
|
| 24 |
'LM-Studio':'lm-studio',
|
| 25 |
'OpenAI-API-Compatible':'openai-api',
|
| 26 |
+
'cohere':'cohere',
|
| 27 |
+
'LeptonAI':'lepton'
|
| 28 |
};
|
| 29 |
|
| 30 |
export const BedrockRegionList = [
|