Spaces:
Running
Running
File size: 3,915 Bytes
df054a5 64c0a55 00f689b 64c0a55 00f689b 4d476fc b5e82a6 4d476fc 64c0a55 4d476fc 64c0a55 4d476fc 64c0a55 b5e82a6 64c0a55 5daf067 4d476fc 64c0a55 193fe95 64c0a55 c842401 289ea40 64c0a55 4829a1a 64c0a55 4829a1a 64c0a55 b947a2a 64c0a55 b947a2a 64c0a55 b947a2a 64c0a55 5f75ed6 64c0a55 5f75ed6 e239b20 5f4af93 d4224b1 df054a5 d4224b1 df054a5 fb46333 df054a5 c0e2830 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
model_list:
- model_name: Llama-3.3-70B-Instruct
litellm_params:
model: huggingface/meta-llama/Llama-3.3-70B-Instruct
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Qwen2.5-72B-Instruct
litellm_params:
model: huggingface/Qwen/Qwen2.5-72B-Instruct
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: DeepSeek-R1-Distill-Qwen-32B
litellm_params:
model: huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: QwQ-32B
litellm_params:
model: huggingface/Qwen/QwQ-32B
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Mistral-Small-3.1-24B-Instruct-2503
litellm_params:
model: huggingface/mistralai/Mistral-Small-3.1-24B-Instruct-2503
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Phi-4
litellm_params:
model: huggingface/microsoft/phi-4
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Qwen3-235B-A22B
litellm_params:
model: huggingface/Qwen/Qwen3-235B-A22B
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: c4ai-command-r-plus-08-2024
litellm_params:
model: huggingface/CohereLabs/c4ai-command-r-plus-08-2024
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Llama-3.1-Nemotron-70B-Instruct-HF
litellm_params:
model: huggingface/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: gemma-3-27b-it
litellm_params:
model: huggingface/google/gemma-3-27b-it
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Qwen2.5-VL-32B-Instruct
litellm_params:
model: huggingface/Qwen/Qwen2.5-VL-32B-Instruct
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: Hermes-3-Llama-3.1-8B
litellm_params:
model: huggingface/NousResearch/Hermes-3-Llama-3.1-8B
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: DeepSeek-R1
litellm_params:
model: huggingface/together/deepseek-ai/DeepSeek-R1
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
- model_name: gpt-oss-120b
litellm_params:
model: huggingface/openai/gpt-oss-120b
api_key: os.environ/HF_TOKEN
# ============================
#
# Model-specific parameters with example
#
# ============================
- model_name: ollama-phi3.5-3.8b
litellm_params:
model: ollama/phi3.5:3.8b
api_base: https://zhengr-ollama.hf.space
# Model-specific parameters
#model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
#api_base: "<your-api-base>"
#api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
#initial_prompt_value: "\n"
#roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
#final_prompt_value: "\n"
#bos_token: "<s>"
#eos_token: "</s>"
#max_tokens: 4096
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
drop_params: True
general_settings:
#master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
#alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env |