Feat: add gpustack model provider (#4469)
Browse files### What problem does this PR solve?
Add GPUStack as a new model provider.
[GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU
cluster manager for running LLMs. Currently, locally deployed models in
GPUStack cannot integrate well with RAGFlow. GPUStack provides both
OpenAI compatible APIs (Models / Chat Completions / Embeddings /
Speech2Text / TTS) and other APIs like Rerank. We would like to use
GPUStack as a model provider in ragflow.
[GPUStack Docs](https://docs.gpustack.ai/latest/quickstart/)
Related issue: https://github.com/infiniflow/ragflow/issues/4064.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### Testing Instructions
1. Install GPUStack and deploy the `llama-3.2-1b-instruct` llm, `bge-m3`
text embedding model, `bge-reranker-v2-m3` rerank model,
`faster-whisper-medium` Speech-to-Text model, `cosyvoice-300m-sft` in
GPUStack.
2. Add provider in ragflow settings.
3. Testing in ragflow.
- api/apps/llm_app.py +2 -2
- conf/llm_factories.json +7 -0
- rag/llm/__init__.py +10 -0
- rag/llm/chat_model.py +8 -0
- rag/llm/embedding_model.py +12 -1
- rag/llm/rerank_model.py +52 -0
- rag/llm/sequence2txt_model.py +12 -0
- rag/llm/tts_model.py +32 -0
- web/src/assets/svg/llm/gpustack.svg +14 -0
- web/src/constants/setting.ts +1 -0
- web/src/pages/user-setting/constants.tsx +1 -0
- web/src/pages/user-setting/setting-model/ollama-modal/index.tsx +8 -0
|
@@ -329,7 +329,7 @@ def my_llms():
|
|
| 329 |
@manager.route('/list', methods=['GET']) # noqa: F821
|
| 330 |
@login_required
|
| 331 |
def list_app():
|
| 332 |
-
|
| 333 |
weighted = ["Youdao", "FastEmbed", "BAAI"] if settings.LIGHTEN != 0 else []
|
| 334 |
model_type = request.args.get("model_type")
|
| 335 |
try:
|
|
@@ -339,7 +339,7 @@ def list_app():
|
|
| 339 |
llms = [m.to_dict()
|
| 340 |
for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
|
| 341 |
for m in llms:
|
| 342 |
-
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in
|
| 343 |
|
| 344 |
llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
|
| 345 |
for o in objs:
|
|
|
|
| 329 |
@manager.route('/list', methods=['GET']) # noqa: F821
|
| 330 |
@login_required
|
| 331 |
def list_app():
|
| 332 |
+
self_deployed = ["Youdao", "FastEmbed", "BAAI", "Ollama", "Xinference", "LocalAI", "LM-Studio", "GPUStack"]
|
| 333 |
weighted = ["Youdao", "FastEmbed", "BAAI"] if settings.LIGHTEN != 0 else []
|
| 334 |
model_type = request.args.get("model_type")
|
| 335 |
try:
|
|
|
|
| 339 |
llms = [m.to_dict()
|
| 340 |
for m in llms if m.status == StatusEnum.VALID.value and m.fid not in weighted]
|
| 341 |
for m in llms:
|
| 342 |
+
m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in self_deployed
|
| 343 |
|
| 344 |
llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
|
| 345 |
for o in objs:
|
|
@@ -2543,6 +2543,13 @@
|
|
| 2543 |
"tags": "TEXT EMBEDDING",
|
| 2544 |
"status": "1",
|
| 2545 |
"llm": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2546 |
}
|
| 2547 |
]
|
| 2548 |
}
|
|
|
|
| 2543 |
"tags": "TEXT EMBEDDING",
|
| 2544 |
"status": "1",
|
| 2545 |
"llm": []
|
| 2546 |
+
},
|
| 2547 |
+
{
|
| 2548 |
+
"name": "GPUStack",
|
| 2549 |
+
"logo": "",
|
| 2550 |
+
"tags": "LLM,TEXT EMBEDDING,TTS,SPEECH2TEXT,TEXT RE-RANK",
|
| 2551 |
+
"status": "1",
|
| 2552 |
+
"llm": []
|
| 2553 |
}
|
| 2554 |
]
|
| 2555 |
}
|
|
@@ -42,6 +42,7 @@ from .embedding_model import (
|
|
| 42 |
VoyageEmbed,
|
| 43 |
HuggingFaceEmbed,
|
| 44 |
VolcEngineEmbed,
|
|
|
|
| 45 |
)
|
| 46 |
from .chat_model import (
|
| 47 |
GptTurbo,
|
|
@@ -80,6 +81,7 @@ from .chat_model import (
|
|
| 80 |
AnthropicChat,
|
| 81 |
GoogleChat,
|
| 82 |
HuggingFaceChat,
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
from .cv_model import (
|
|
@@ -116,6 +118,7 @@ from .rerank_model import (
|
|
| 116 |
BaiduYiyanRerank,
|
| 117 |
VoyageRerank,
|
| 118 |
QWenRerank,
|
|
|
|
| 119 |
)
|
| 120 |
from .sequence2txt_model import (
|
| 121 |
GPTSeq2txt,
|
|
@@ -123,6 +126,7 @@ from .sequence2txt_model import (
|
|
| 123 |
AzureSeq2txt,
|
| 124 |
XinferenceSeq2txt,
|
| 125 |
TencentCloudSeq2txt,
|
|
|
|
| 126 |
)
|
| 127 |
from .tts_model import (
|
| 128 |
FishAudioTTS,
|
|
@@ -130,6 +134,7 @@ from .tts_model import (
|
|
| 130 |
OpenAITTS,
|
| 131 |
SparkTTS,
|
| 132 |
XinferenceTTS,
|
|
|
|
| 133 |
)
|
| 134 |
|
| 135 |
EmbeddingModel = {
|
|
@@ -161,6 +166,7 @@ EmbeddingModel = {
|
|
| 161 |
"Voyage AI": VoyageEmbed,
|
| 162 |
"HuggingFace": HuggingFaceEmbed,
|
| 163 |
"VolcEngine": VolcEngineEmbed,
|
|
|
|
| 164 |
}
|
| 165 |
|
| 166 |
CvModel = {
|
|
@@ -220,6 +226,7 @@ ChatModel = {
|
|
| 220 |
"Anthropic": AnthropicChat,
|
| 221 |
"Google Cloud": GoogleChat,
|
| 222 |
"HuggingFace": HuggingFaceChat,
|
|
|
|
| 223 |
}
|
| 224 |
|
| 225 |
RerankModel = {
|
|
@@ -237,6 +244,7 @@ RerankModel = {
|
|
| 237 |
"BaiduYiyan": BaiduYiyanRerank,
|
| 238 |
"Voyage AI": VoyageRerank,
|
| 239 |
"Tongyi-Qianwen": QWenRerank,
|
|
|
|
| 240 |
}
|
| 241 |
|
| 242 |
Seq2txtModel = {
|
|
@@ -245,6 +253,7 @@ Seq2txtModel = {
|
|
| 245 |
"Azure-OpenAI": AzureSeq2txt,
|
| 246 |
"Xinference": XinferenceSeq2txt,
|
| 247 |
"Tencent Cloud": TencentCloudSeq2txt,
|
|
|
|
| 248 |
}
|
| 249 |
|
| 250 |
TTSModel = {
|
|
@@ -253,4 +262,5 @@ TTSModel = {
|
|
| 253 |
"OpenAI": OpenAITTS,
|
| 254 |
"XunFei Spark": SparkTTS,
|
| 255 |
"Xinference": XinferenceTTS,
|
|
|
|
| 256 |
}
|
|
|
|
| 42 |
VoyageEmbed,
|
| 43 |
HuggingFaceEmbed,
|
| 44 |
VolcEngineEmbed,
|
| 45 |
+
GPUStackEmbed,
|
| 46 |
)
|
| 47 |
from .chat_model import (
|
| 48 |
GptTurbo,
|
|
|
|
| 81 |
AnthropicChat,
|
| 82 |
GoogleChat,
|
| 83 |
HuggingFaceChat,
|
| 84 |
+
GPUStackChat,
|
| 85 |
)
|
| 86 |
|
| 87 |
from .cv_model import (
|
|
|
|
| 118 |
BaiduYiyanRerank,
|
| 119 |
VoyageRerank,
|
| 120 |
QWenRerank,
|
| 121 |
+
GPUStackRerank,
|
| 122 |
)
|
| 123 |
from .sequence2txt_model import (
|
| 124 |
GPTSeq2txt,
|
|
|
|
| 126 |
AzureSeq2txt,
|
| 127 |
XinferenceSeq2txt,
|
| 128 |
TencentCloudSeq2txt,
|
| 129 |
+
GPUStackSeq2txt,
|
| 130 |
)
|
| 131 |
from .tts_model import (
|
| 132 |
FishAudioTTS,
|
|
|
|
| 134 |
OpenAITTS,
|
| 135 |
SparkTTS,
|
| 136 |
XinferenceTTS,
|
| 137 |
+
GPUStackTTS,
|
| 138 |
)
|
| 139 |
|
| 140 |
EmbeddingModel = {
|
|
|
|
| 166 |
"Voyage AI": VoyageEmbed,
|
| 167 |
"HuggingFace": HuggingFaceEmbed,
|
| 168 |
"VolcEngine": VolcEngineEmbed,
|
| 169 |
+
"GPUStack": GPUStackEmbed,
|
| 170 |
}
|
| 171 |
|
| 172 |
CvModel = {
|
|
|
|
| 226 |
"Anthropic": AnthropicChat,
|
| 227 |
"Google Cloud": GoogleChat,
|
| 228 |
"HuggingFace": HuggingFaceChat,
|
| 229 |
+
"GPUStack": GPUStackChat,
|
| 230 |
}
|
| 231 |
|
| 232 |
RerankModel = {
|
|
|
|
| 244 |
"BaiduYiyan": BaiduYiyanRerank,
|
| 245 |
"Voyage AI": VoyageRerank,
|
| 246 |
"Tongyi-Qianwen": QWenRerank,
|
| 247 |
+
"GPUStack": GPUStackRerank,
|
| 248 |
}
|
| 249 |
|
| 250 |
Seq2txtModel = {
|
|
|
|
| 253 |
"Azure-OpenAI": AzureSeq2txt,
|
| 254 |
"Xinference": XinferenceSeq2txt,
|
| 255 |
"Tencent Cloud": TencentCloudSeq2txt,
|
| 256 |
+
"GPUStack": GPUStackSeq2txt,
|
| 257 |
}
|
| 258 |
|
| 259 |
TTSModel = {
|
|
|
|
| 262 |
"OpenAI": OpenAITTS,
|
| 263 |
"XunFei Spark": SparkTTS,
|
| 264 |
"Xinference": XinferenceTTS,
|
| 265 |
+
"GPUStack": GPUStackTTS,
|
| 266 |
}
|
|
@@ -1514,3 +1514,11 @@ class GoogleChat(Base):
|
|
| 1514 |
yield ans + "\n**ERROR**: " + str(e)
|
| 1515 |
|
| 1516 |
yield response._chunks[-1].usage_metadata.total_token_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1514 |
yield ans + "\n**ERROR**: " + str(e)
|
| 1515 |
|
| 1516 |
yield response._chunks[-1].usage_metadata.total_token_count
|
| 1517 |
+
|
| 1518 |
+
class GPUStackChat(Base):
|
| 1519 |
+
def __init__(self, key=None, model_name="", base_url=""):
|
| 1520 |
+
if not base_url:
|
| 1521 |
+
raise ValueError("Local llm url cannot be None")
|
| 1522 |
+
if base_url.split("/")[-1] != "v1-openai":
|
| 1523 |
+
base_url = os.path.join(base_url, "v1-openai")
|
| 1524 |
+
super().__init__(key, model_name, base_url)
|
|
@@ -30,7 +30,7 @@ import asyncio
|
|
| 30 |
from api import settings
|
| 31 |
from api.utils.file_utils import get_home_cache_dir
|
| 32 |
from rag.utils import num_tokens_from_string, truncate
|
| 33 |
-
import google.generativeai as genai
|
| 34 |
import json
|
| 35 |
|
| 36 |
|
|
@@ -799,3 +799,14 @@ class VolcEngineEmbed(OpenAIEmbed):
|
|
| 799 |
ark_api_key = json.loads(key).get('ark_api_key', '')
|
| 800 |
model_name = json.loads(key).get('ep_id', '') + json.loads(key).get('endpoint_id', '')
|
| 801 |
super().__init__(ark_api_key,model_name,base_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
from api import settings
|
| 31 |
from api.utils.file_utils import get_home_cache_dir
|
| 32 |
from rag.utils import num_tokens_from_string, truncate
|
| 33 |
+
import google.generativeai as genai
|
| 34 |
import json
|
| 35 |
|
| 36 |
|
|
|
|
| 799 |
ark_api_key = json.loads(key).get('ark_api_key', '')
|
| 800 |
model_name = json.loads(key).get('ep_id', '') + json.loads(key).get('endpoint_id', '')
|
| 801 |
super().__init__(ark_api_key,model_name,base_url)
|
| 802 |
+
|
| 803 |
+
class GPUStackEmbed(OpenAIEmbed):
|
| 804 |
+
def __init__(self, key, model_name, base_url):
|
| 805 |
+
if not base_url:
|
| 806 |
+
raise ValueError("url cannot be None")
|
| 807 |
+
if base_url.split("/")[-1] != "v1-openai":
|
| 808 |
+
base_url = os.path.join(base_url, "v1-openai")
|
| 809 |
+
|
| 810 |
+
print(key,base_url)
|
| 811 |
+
self.client = OpenAI(api_key=key, base_url=base_url)
|
| 812 |
+
self.model_name = model_name
|
|
@@ -18,10 +18,12 @@ import threading
|
|
| 18 |
from urllib.parse import urljoin
|
| 19 |
|
| 20 |
import requests
|
|
|
|
| 21 |
from huggingface_hub import snapshot_download
|
| 22 |
import os
|
| 23 |
from abc import ABC
|
| 24 |
import numpy as np
|
|
|
|
| 25 |
|
| 26 |
from api import settings
|
| 27 |
from api.utils.file_utils import get_home_cache_dir
|
|
@@ -457,3 +459,53 @@ class QWenRerank(Base):
|
|
| 457 |
return rank, resp.usage.total_tokens
|
| 458 |
else:
|
| 459 |
raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from urllib.parse import urljoin
|
| 19 |
|
| 20 |
import requests
|
| 21 |
+
import httpx
|
| 22 |
from huggingface_hub import snapshot_download
|
| 23 |
import os
|
| 24 |
from abc import ABC
|
| 25 |
import numpy as np
|
| 26 |
+
from yarl import URL
|
| 27 |
|
| 28 |
from api import settings
|
| 29 |
from api.utils.file_utils import get_home_cache_dir
|
|
|
|
| 459 |
return rank, resp.usage.total_tokens
|
| 460 |
else:
|
| 461 |
raise ValueError(f"Error calling QWenRerank model {self.model_name}: {resp.status_code} - {resp.text}")
|
| 462 |
+
|
| 463 |
+
class GPUStackRerank(Base):
|
| 464 |
+
def __init__(
|
| 465 |
+
self, key, model_name, base_url
|
| 466 |
+
):
|
| 467 |
+
if not base_url:
|
| 468 |
+
raise ValueError("url cannot be None")
|
| 469 |
+
|
| 470 |
+
self.model_name = model_name
|
| 471 |
+
self.base_url = str(URL(base_url)/ "v1" / "rerank")
|
| 472 |
+
self.headers = {
|
| 473 |
+
"accept": "application/json",
|
| 474 |
+
"content-type": "application/json",
|
| 475 |
+
"authorization": f"Bearer {key}",
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
def similarity(self, query: str, texts: list):
|
| 479 |
+
payload = {
|
| 480 |
+
"model": self.model_name,
|
| 481 |
+
"query": query,
|
| 482 |
+
"documents": texts,
|
| 483 |
+
"top_n": len(texts),
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
try:
|
| 487 |
+
response = requests.post(
|
| 488 |
+
self.base_url, json=payload, headers=self.headers
|
| 489 |
+
)
|
| 490 |
+
response.raise_for_status()
|
| 491 |
+
response_json = response.json()
|
| 492 |
+
|
| 493 |
+
rank = np.zeros(len(texts), dtype=float)
|
| 494 |
+
if "results" not in response_json:
|
| 495 |
+
return rank, 0
|
| 496 |
+
|
| 497 |
+
token_count = 0
|
| 498 |
+
for t in texts:
|
| 499 |
+
token_count += num_tokens_from_string(t)
|
| 500 |
+
|
| 501 |
+
for result in response_json["results"]:
|
| 502 |
+
rank[result["index"]] = result["relevance_score"]
|
| 503 |
+
|
| 504 |
+
return (
|
| 505 |
+
rank,
|
| 506 |
+
token_count,
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
except httpx.HTTPStatusError as e:
|
| 510 |
+
raise ValueError(f"Error calling GPUStackRerank model {self.model_name}: {e.response.status_code} - {e.response.text}")
|
| 511 |
+
|
|
@@ -13,6 +13,7 @@
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
|
|
|
| 16 |
import requests
|
| 17 |
from openai.lib.azure import AzureOpenAI
|
| 18 |
import io
|
|
@@ -191,3 +192,14 @@ class TencentCloudSeq2txt(Base):
|
|
| 191 |
return "**ERROR**: " + str(e), 0
|
| 192 |
except Exception as e:
|
| 193 |
return "**ERROR**: " + str(e), 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
+
import os
|
| 17 |
import requests
|
| 18 |
from openai.lib.azure import AzureOpenAI
|
| 19 |
import io
|
|
|
|
| 192 |
return "**ERROR**: " + str(e), 0
|
| 193 |
except Exception as e:
|
| 194 |
return "**ERROR**: " + str(e), 0
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class GPUStackSeq2txt(Base):
|
| 198 |
+
def __init__(self, key, model_name, base_url):
|
| 199 |
+
if not base_url:
|
| 200 |
+
raise ValueError("url cannot be None")
|
| 201 |
+
if base_url.split("/")[-1] != "v1-openai":
|
| 202 |
+
base_url = os.path.join(base_url, "v1-openai")
|
| 203 |
+
self.base_url = base_url
|
| 204 |
+
self.model_name = model_name
|
| 205 |
+
self.key = key
|
|
@@ -355,3 +355,35 @@ class OllamaTTS(Base):
|
|
| 355 |
for chunk in response.iter_content():
|
| 356 |
if chunk:
|
| 357 |
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
for chunk in response.iter_content():
|
| 356 |
if chunk:
|
| 357 |
yield chunk
|
| 358 |
+
|
| 359 |
+
class GPUStackTTS:
|
| 360 |
+
def __init__(self, key, model_name, **kwargs):
|
| 361 |
+
self.base_url = kwargs.get("base_url", None)
|
| 362 |
+
self.api_key = key
|
| 363 |
+
self.model_name = model_name
|
| 364 |
+
self.headers = {
|
| 365 |
+
"accept": "application/json",
|
| 366 |
+
"Content-Type": "application/json",
|
| 367 |
+
"Authorization": f"Bearer {self.api_key}"
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
def tts(self, text, voice="Chinese Female", stream=True):
|
| 371 |
+
payload = {
|
| 372 |
+
"model": self.model_name,
|
| 373 |
+
"input": text,
|
| 374 |
+
"voice": voice
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
response = requests.post(
|
| 378 |
+
f"{self.base_url}/v1-openai/audio/speech",
|
| 379 |
+
headers=self.headers,
|
| 380 |
+
json=payload,
|
| 381 |
+
stream=stream
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
if response.status_code != 200:
|
| 385 |
+
raise Exception(f"**Error**: {response.status_code}, {response.text}")
|
| 386 |
+
|
| 387 |
+
for chunk in response.iter_content(chunk_size=1024):
|
| 388 |
+
if chunk:
|
| 389 |
+
yield chunk
|
|
|
|
@@ -72,6 +72,7 @@ export const IconMap = {
|
|
| 72 |
'nomic-ai': 'nomic-ai',
|
| 73 |
jinaai: 'jina',
|
| 74 |
'sentence-transformers': 'sentence-transformers',
|
|
|
|
| 75 |
};
|
| 76 |
|
| 77 |
export const TimezoneList = [
|
|
|
|
| 72 |
'nomic-ai': 'nomic-ai',
|
| 73 |
jinaai: 'jina',
|
| 74 |
'sentence-transformers': 'sentence-transformers',
|
| 75 |
+
GPUStack: 'gpustack',
|
| 76 |
};
|
| 77 |
|
| 78 |
export const TimezoneList = [
|
|
@@ -31,6 +31,7 @@ export const LocalLlmFactories = [
|
|
| 31 |
'Replicate',
|
| 32 |
'OpenRouter',
|
| 33 |
'HuggingFace',
|
|
|
|
| 34 |
];
|
| 35 |
|
| 36 |
export enum TenantRole {
|
|
|
|
| 31 |
'Replicate',
|
| 32 |
'OpenRouter',
|
| 33 |
'HuggingFace',
|
| 34 |
+
'GPUStack',
|
| 35 |
];
|
| 36 |
|
| 37 |
export enum TenantRole {
|
|
@@ -29,6 +29,7 @@ const llmFactoryToUrlMap = {
|
|
| 29 |
OpenRouter: 'https://openrouter.ai/docs',
|
| 30 |
HuggingFace:
|
| 31 |
'https://huggingface.co/docs/text-embeddings-inference/quick_tour',
|
|
|
|
| 32 |
};
|
| 33 |
type LlmFactory = keyof typeof llmFactoryToUrlMap;
|
| 34 |
|
|
@@ -76,6 +77,13 @@ const OllamaModal = ({
|
|
| 76 |
{ value: 'speech2text', label: 'sequence2text' },
|
| 77 |
{ value: 'tts', label: 'tts' },
|
| 78 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
Default: [
|
| 80 |
{ value: 'chat', label: 'chat' },
|
| 81 |
{ value: 'embedding', label: 'embedding' },
|
|
|
|
| 29 |
OpenRouter: 'https://openrouter.ai/docs',
|
| 30 |
HuggingFace:
|
| 31 |
'https://huggingface.co/docs/text-embeddings-inference/quick_tour',
|
| 32 |
+
GPUStack: 'https://docs.gpustack.ai/latest/quickstart',
|
| 33 |
};
|
| 34 |
type LlmFactory = keyof typeof llmFactoryToUrlMap;
|
| 35 |
|
|
|
|
| 77 |
{ value: 'speech2text', label: 'sequence2text' },
|
| 78 |
{ value: 'tts', label: 'tts' },
|
| 79 |
],
|
| 80 |
+
GPUStack: [
|
| 81 |
+
{ value: 'chat', label: 'chat' },
|
| 82 |
+
{ value: 'embedding', label: 'embedding' },
|
| 83 |
+
{ value: 'rerank', label: 'rerank' },
|
| 84 |
+
{ value: 'speech2text', label: 'sequence2text' },
|
| 85 |
+
{ value: 'tts', label: 'tts' },
|
| 86 |
+
],
|
| 87 |
Default: [
|
| 88 |
{ value: 'chat', label: 'chat' },
|
| 89 |
{ value: 'embedding', label: 'embedding' },
|