Spaces:
Running
Running
File size: 1,446 Bytes
d8e2b36 86b351a f42cab1 d8e2b36 86b351a 21eb680 86b351a 21eb680 1974cbc 86b351a d8e2b36 21eb680 86b351a d8e2b36 f42cab1 d8e2b36 86b351a d8e2b36 f42cab1 86b351a f42cab1 4310b90 f42cab1 4310b90 2999669 4310b90 f42cab1 4310b90 f42cab1 86b351a d8e2b36 86b351a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
"""Utility functions for working with the language model."""
import logging
from langchain_google_genai import ChatGoogleGenerativeAI
from config import settings
from services.google import ApiKeyPool
logger = logging.getLogger(__name__)
_pool = ApiKeyPool()
MODEL_NAME = "gemini-2.5-flash"
def _get_api_key() -> str:
"""Return an API key using round-robin selection in a thread-safe way."""
return _pool.get_key_sync()
def create_llm(
temperature: float = settings.temperature,
top_p: float = settings.top_p,
) -> ChatGoogleGenerativeAI:
"""Create a standard LLM instance."""
llm = ChatGoogleGenerativeAI(
model=MODEL_NAME,
google_api_key=_get_api_key(),
temperature=temperature,
top_p=top_p,
thinking_budget=1024,
timeout=settings.request_timeout,
max_retries=3,
)
return llm
def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
"""Create a light LLM instance with a shorter timeout."""
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
google_api_key=_get_api_key(),
temperature=temperature,
top_p=top_p,
timeout=settings.request_timeout,
max_retries=3,
)
return llm
def create_precise_llm() -> ChatGoogleGenerativeAI:
"""Return an LLM tuned for deterministic output."""
return create_llm(temperature=0, top_p=1)
|