from __future__ import annotations import os from typing import Any from pydantic import BaseModel, Field, SecretStr from openhands.core.logger import LOG_DIR class LLMConfig(BaseModel): """Configuration for the LLM model. Attributes: model: The model to use. api_key: The API key to use. base_url: The base URL for the API. This is necessary for local LLMs. It is also used for Azure embeddings. api_version: The version of the API. embedding_model: The embedding model to use. embedding_base_url: The base URL for the embedding API. embedding_deployment_name: The name of the deployment for the embedding API. This is used for Azure OpenAI. aws_access_key_id: The AWS access key ID. aws_secret_access_key: The AWS secret access key. aws_region_name: The AWS region name. num_retries: The number of retries to attempt. retry_multiplier: The multiplier for the exponential backoff. retry_min_wait: The minimum time to wait between retries, in seconds. This is exponential backoff minimum. For models with very low limits, this can be set to 15-20. retry_max_wait: The maximum time to wait between retries, in seconds. This is exponential backoff maximum. timeout: The timeout for the API. max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated. temperature: The temperature for the API. top_p: The top p for the API. custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side. max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4). max_output_tokens: The maximum number of output tokens. This is sent to the LLM. input_cost_per_token: The cost per input token. This will available in logs for the user to check. output_cost_per_token: The cost per output token. This will available in logs for the user to check. ollama_base_url: The base URL for the OLLAMA API. drop_params: Drop any unmapped (unsupported) params without causing an exception. modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty. disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction). reasoning_effort: The effort to put into reasoning. This is a string that can be one of 'low', 'medium', 'high', or 'none'. Exclusive for o1 models. caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider. log_completions: Whether to log LLM completions to the state. log_completions_folder: The folder to log LLM completions to. Required if log_completions is True. custom_tokenizer: A custom tokenizer to use for token counting. native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set. """ model: str = Field(default='claude-3-5-sonnet-20241022') api_key: SecretStr | None = Field(default=None) base_url: str | None = Field(default=None) api_version: str | None = Field(default=None) embedding_model: str = Field(default='local') embedding_base_url: str | None = Field(default=None) embedding_deployment_name: str | None = Field(default=None) aws_access_key_id: SecretStr | None = Field(default=None) aws_secret_access_key: SecretStr | None = Field(default=None) aws_region_name: str | None = Field(default=None) openrouter_site_url: str = Field(default='https://docs.all-hands.dev/') openrouter_app_name: str = Field(default='OpenHands') num_retries: int = Field(default=8) retry_multiplier: float = Field(default=2) retry_min_wait: int = Field(default=15) retry_max_wait: int = Field(default=120) timeout: int | None = Field(default=None) max_message_chars: int = Field( default=30_000 ) # maximum number of characters in an observation's content when sent to the llm temperature: float = Field(default=0.0) top_p: float = Field(default=1.0) custom_llm_provider: str | None = Field(default=None) max_input_tokens: int | None = Field(default=None) max_output_tokens: int | None = Field(default=None) input_cost_per_token: float | None = Field(default=None) output_cost_per_token: float | None = Field(default=None) ollama_base_url: str | None = Field(default=None) # This setting can be sent in each call to litellm drop_params: bool = Field(default=True) # Note: this setting is actually global, unlike drop_params modify_params: bool = Field(default=True) disable_vision: bool | None = Field(default=None) caching_prompt: bool = Field(default=True) log_completions: bool = Field(default=False) log_completions_folder: str = Field(default=os.path.join(LOG_DIR, 'completions')) custom_tokenizer: str | None = Field(default=None) native_tool_calling: bool | None = Field(default=None) model_config = {'extra': 'forbid'} def model_post_init(self, __context: Any): """Post-initialization hook to assign OpenRouter-related variables to environment variables. This ensures that these values are accessible to litellm at runtime. """ super().model_post_init(__context) # Assign OpenRouter-specific variables to environment variables if self.openrouter_site_url: os.environ['OR_SITE_URL'] = self.openrouter_site_url if self.openrouter_app_name: os.environ['OR_APP_NAME'] = self.openrouter_app_name