Spaces:

mgbam
/

builder

Running

App Files Files Community

mgbam commited on Jul 25

Commit

dcf9dad

verified ·

1 Parent(s): e7d5ce8

Create models.py

Browse files

Files changed (1) hide show

models.py +155 -0

models.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# models.py
+from dataclasses import dataclass
+from typing import List, Optional
+@dataclass
+class ModelInfo:
+    """
+    Represents metadata for an inference model.
+    Attributes:
+        name: Human-readable name of the model.
+        id: Unique model identifier (HF/externally routed).
+        description: Short description of the model's capabilities.
+        default_provider: Preferred inference provider ("auto", "groq", "openai", "gemini", "fireworks").
+    """
+    name: str
+    id: str
+    description: str
+    default_provider: str = "auto"
+# Registry of supported models
+AVAILABLE_MODELS: List[ModelInfo] = [
+    ModelInfo(
+        name="Moonshot Kimi-K2",
+        id="moonshotai/Kimi-K2-Instruct",
+        description="Moonshot AI Kimi-K2-Instruct model for code generation and general tasks",
+        default_provider="groq"
+    ),
+    ModelInfo(
+        name="DeepSeek V3",
+        id="deepseek-ai/DeepSeek-V3-0324",
+        description="DeepSeek V3 model for code generation",
+    ),
+    ModelInfo(
+        name="DeepSeek R1",
+        id="deepseek-ai/DeepSeek-R1-0528",
+        description="DeepSeek R1 model for code generation",
+    ),
+    ModelInfo(
+        name="ERNIE-4.5-VL",
+        id="baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
+        description="ERNIE-4.5-VL model for multimodal code generation with image support",
+    ),
+    ModelInfo(
+        name="MiniMax M1",
+        id="MiniMaxAI/MiniMax-M1-80k",
+        description="MiniMax M1 model for code generation and general tasks",
+    ),
+    ModelInfo(
+        name="Qwen3-235B-A22B",
+        id="Qwen/Qwen3-235B-A22B",
+        description="Qwen3-235B-A22B model for code generation and general tasks",
+    ),
+    ModelInfo(
+        name="SmolLM3-3B",
+        id="HuggingFaceTB/SmolLM3-3B",
+        description="SmolLM3-3B model for code generation and general tasks",
+    ),
+    ModelInfo(
+        name="GLM-4.1V-9B-Thinking",
+        id="THUDM/GLM-4.1V-9B-Thinking",
+        description="GLM-4.1V-9B-Thinking model for multimodal code generation with image support",
+    ),
+    ModelInfo(
+        name="OpenAI GPT-4",
+        id="openai/gpt-4",
+        description="OpenAI GPT-4 model via HF Inference Providers",
+        default_provider="openai"
+    ),
+    ModelInfo(
+        name="Gemini Pro",
+        id="gemini/pro",
+        description="Google Gemini Pro model via HF Inference Providers",
+        default_provider="gemini"
+    ),
+    ModelInfo(
+        name="Fireworks AI",
+        id="fireworks-ai/fireworks-v1",
+        description="Fireworks AI model via HF Inference Providers",
+        default_provider="fireworks"
+    ),
+]
+def find_model(identifier: str) -> Optional[ModelInfo]:
+    """
+    Lookup a model by its human name or identifier.
+    Args:
+        identifier: ModelInfo.name (case-insensitive) or ModelInfo.id
+    Returns:
+        The matching ModelInfo or None if not found.
+    """
+    identifier_lower = identifier.lower()
+    for model in AVAILABLE_MODELS:
+        if model.id == identifier or model.name.lower() == identifier_lower:
+            return model
+    return None
+# inference.py
+from typing import List, Dict
+from hf_client import get_inference_client
+def chat_completion(
+    model_id: str,
+    messages: List[Dict[str, str]],
+    provider: str = None,
+    max_tokens: int = 4096
+) -> str:
+    """
+    Send a chat completion request to the appropriate inference provider.
+    Args:
+        model_id: The model identifier to use.
+        messages: A list of OpenAI-style {'role','content'} messages.
+        provider: Optional override for provider; uses model default if None.
+        max_tokens: Maximum tokens to generate.
+    Returns:
+        The assistant's response content.
+    """
+    # Initialize client (provider resolution inside)
+    client = get_inference_client(model_id, provider or "auto")
+    response = client.chat.completions.create(
+        model=model_id,
+        messages=messages,
+        max_tokens=max_tokens
+    )
+    # Extract and return first choice content
+    return response.choices[0].message.content
+def stream_chat_completion(
+    model_id: str,
+    messages: List[Dict[str, str]],
+    provider: str = None,
+    max_tokens: int = 4096
+):
+    """
+    Generator for streaming chat completions.
+    Yields partial message chunks as strings.
+    """
+    client = get_inference_client(model_id, provider or "auto")
+    stream = client.chat.completions.create(
+        model=model_id,
+        messages=messages,
+        max_tokens=max_tokens,
+        stream=True
+    )
+    for chunk in stream:
+        delta = getattr(chunk.choices[0].delta, "content", None)
+        if delta:
+            yield delta