|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
from dataclasses import dataclass |
|
from typing import List, Optional |
|
|
|
|
|
@dataclass(slots=True, frozen=True) |
|
class ModelInfo: |
|
""" |
|
Metadata for a single model entry. |
|
|
|
Attributes |
|
---------- |
|
name : Human‑readable label shown in the UI. |
|
id : Fully‑qualified model path, e.g. "openai/gpt‑4". |
|
description : Short capability blurb. |
|
default_provider: Which provider to send inference requests to if the |
|
caller does **not** override it. Supported values: |
|
"auto" | "groq" | "openai" | "gemini" | "fireworks". |
|
The special value "auto" lets HF Inference decide. |
|
""" |
|
name: str |
|
id: str |
|
description: str |
|
default_provider: str = "auto" |
|
|
|
|
|
|
|
|
|
|
|
AVAILABLE_MODELS: List[ModelInfo] = [ |
|
|
|
ModelInfo( |
|
name="Qwen/Qwen3‑32B", |
|
id="Qwen/Qwen3-32B", |
|
description="Qwen3‑32B model for high‑capacity code and text generation", |
|
), |
|
ModelInfo( |
|
name="Qwen3‑235B‑A22B", |
|
id="Qwen/Qwen3-235B-A22B", |
|
description="Qwen3‑235B‑A22B model for code generation and general tasks", |
|
), |
|
|
|
ModelInfo( |
|
name="Moonshot Kimi‑K2", |
|
id="moonshotai/Kimi-K2-Instruct", |
|
description="Moonshot AI Kimi‑K2‑Instruct (code, chat)", |
|
default_provider="groq", |
|
), |
|
|
|
ModelInfo( |
|
name="DeepSeek V3", |
|
id="deepseek-ai/DeepSeek-V3-0324", |
|
description="DeepSeek V3 model for code generation", |
|
), |
|
ModelInfo( |
|
name="DeepSeek R1", |
|
id="deepseek-ai/DeepSeek-R1-0528", |
|
description="DeepSeek R1 model for code generation", |
|
), |
|
|
|
ModelInfo( |
|
name="ERNIE‑4.5‑VL", |
|
id="baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", |
|
description="ERNIE‑4.5‑VL multimodal model (image + text)", |
|
), |
|
ModelInfo( |
|
name="GLM‑4.1V‑9B‑Thinking", |
|
id="THUDM/GLM-4.1V-9B-Thinking", |
|
description="GLM‑4.1V‑9B multimodal reasoning model", |
|
), |
|
|
|
ModelInfo( |
|
name="SmolLM3‑3B", |
|
id="HuggingFaceTB/SmolLM3-3B", |
|
description="SmolLM3‑3B fast, low‑latency model", |
|
), |
|
ModelInfo( |
|
name="MiniMax M1", |
|
id="MiniMaxAI/MiniMax-M1-80k", |
|
description="MiniMax M1 80k‑context general model", |
|
), |
|
|
|
ModelInfo( |
|
name="OpenAI GPT‑4", |
|
id="openai/gpt-4", |
|
description="OpenAI GPT‑4 accessed through HF Inference Providers", |
|
default_provider="openai", |
|
), |
|
ModelInfo( |
|
name="Gemini Pro", |
|
id="gemini/pro", |
|
description="Google Gemini Pro via HF Inference Providers", |
|
default_provider="gemini", |
|
), |
|
ModelInfo( |
|
name="Fireworks V1", |
|
id="fireworks-ai/fireworks-v1", |
|
description="Fireworks AI flagship model", |
|
default_provider="fireworks", |
|
), |
|
] |
|
|
|
|
|
|
|
|
|
|
|
def find_model(identifier: str) -> Optional[ModelInfo]: |
|
""" |
|
Retrieve a `ModelInfo` either by `.id` or by case‑insensitive `.name`. |
|
|
|
Parameters |
|
---------- |
|
identifier : str |
|
- `"openai/gpt-4"` – exact id |
|
- `"OpenAI GPT-4"` – human label |
|
|
|
Returns |
|
------- |
|
Optional[ModelInfo] |
|
""" |
|
key = identifier.lower() |
|
for m in AVAILABLE_MODELS: |
|
if m.id == identifier or m.name.lower() == key: |
|
return m |
|
return None |
|
|