Spaces:
Sleeping
Sleeping
| model_list: | |
| - model_name: Qwen3-235B-A22B | |
| litellm_params: | |
| model: huggingface/hf-inference/models/Qwen/Qwen3-235B-A22B | |
| api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env | |
| - model_name: ollama-phi3.5-3.8b | |
| litellm_params: | |
| model: ollama/phi3.5:3.8b | |
| api_base: https://zhengr-ollama.hf.space | |
| # Model-specific parameters | |
| #model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" | |
| #api_base: "<your-api-base>" | |
| #api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints | |
| #initial_prompt_value: "\n" | |
| #roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} | |
| #final_prompt_value: "\n" | |
| #bos_token: "<s>" | |
| #eos_token: "</s>" | |
| #max_tokens: 4096 | |
| litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py | |
| drop_params: True | |
| general_settings: | |
| #master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234) | |
| #alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env |