model_list: - model_name: Llama-3.3-70B-Instruct litellm_params: model: huggingface/meta-llama/Llama-3.3-70B-Instruct api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Qwen2.5-72B-Instruct litellm_params: model: huggingface/Qwen/Qwen2.5-72B-Instruct api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: DeepSeek-R1-Distill-Qwen-32B litellm_params: model: huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: QwQ-32B litellm_params: model: huggingface/Qwen/QwQ-32B api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Mistral-Small-3.1-24B-Instruct-2503 litellm_params: model: huggingface/mistralai/Mistral-Small-3.1-24B-Instruct-2503 api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Phi-4 litellm_params: model: huggingface/microsoft/phi-4 api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Qwen3-235B-A22B litellm_params: model: huggingface/Qwen/Qwen3-235B-A22B api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: c4ai-command-r-plus-08-2024 litellm_params: model: huggingface/CohereLabs/c4ai-command-r-plus-08-2024 api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Llama-3.1-Nemotron-70B-Instruct-HF litellm_params: model: huggingface/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: gemma-3-27b-it litellm_params: model: huggingface/google/gemma-3-27b-it api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Qwen2.5-VL-32B-Instruct litellm_params: model: huggingface/Qwen/Qwen2.5-VL-32B-Instruct api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: Hermes-3-Llama-3.1-8B litellm_params: model: huggingface/NousResearch/Hermes-3-Llama-3.1-8B api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: DeepSeek-R1 litellm_params: model: huggingface/together/deepseek-ai/DeepSeek-R1 api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env - model_name: gpt-oss-120b litellm_params: model: huggingface/openai/gpt-oss-120b api_key: os.environ/HF_TOKEN # ============================ # # Model-specific parameters with example # # ============================ - model_name: ollama-phi3.5-3.8b litellm_params: model: ollama/phi3.5:3.8b api_base: https://zhengr-ollama.hf.space # Model-specific parameters #model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" #api_base: "" #api_key: "" # [OPTIONAL] for hf inference endpoints #initial_prompt_value: "\n" #roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} #final_prompt_value: "\n" #bos_token: "" #eos_token: "" #max_tokens: 4096 litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py drop_params: True general_settings: #master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234) #alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env