File size: 3,915 Bytes
df054a5
64c0a55
00f689b
64c0a55
00f689b
4d476fc
 
 
b5e82a6
4d476fc
 
 
 
64c0a55
4d476fc
 
 
64c0a55
4d476fc
64c0a55
b5e82a6
64c0a55
 
5daf067
4d476fc
64c0a55
193fe95
64c0a55
c842401
289ea40
 
 
 
64c0a55
 
4829a1a
64c0a55
 
 
4829a1a
64c0a55
b947a2a
 
 
 
64c0a55
b947a2a
64c0a55
b947a2a
64c0a55
5f75ed6
64c0a55
5f75ed6
e239b20
 
 
 
5f4af93
 
 
 
 
d4224b1
df054a5
d4224b1
df054a5
fb46333
 
 
 
 
 
 
 
 
 
df054a5
 
 
 
 
c0e2830
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
model_list:
  - model_name: Llama-3.3-70B-Instruct
    litellm_params:
      model: huggingface/meta-llama/Llama-3.3-70B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen2.5-72B-Instruct
    litellm_params:
      model: huggingface/Qwen/Qwen2.5-72B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env 
  - model_name: DeepSeek-R1-Distill-Qwen-32B
    litellm_params:
      model: huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: QwQ-32B
    litellm_params:
      model: huggingface/Qwen/QwQ-32B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Mistral-Small-3.1-24B-Instruct-2503
    litellm_params:
      model: huggingface/mistralai/Mistral-Small-3.1-24B-Instruct-2503
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env  
  - model_name: Phi-4
    litellm_params:
      model: huggingface/microsoft/phi-4
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen3-235B-A22B
    litellm_params:
      model: huggingface/Qwen/Qwen3-235B-A22B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: c4ai-command-r-plus-08-2024
    litellm_params:
      model: huggingface/CohereLabs/c4ai-command-r-plus-08-2024
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Llama-3.1-Nemotron-70B-Instruct-HF
    litellm_params:
      model: huggingface/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: gemma-3-27b-it
    litellm_params:
      model: huggingface/google/gemma-3-27b-it
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen2.5-VL-32B-Instruct
    litellm_params:
      model: huggingface/Qwen/Qwen2.5-VL-32B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Hermes-3-Llama-3.1-8B
    litellm_params:
      model: huggingface/NousResearch/Hermes-3-Llama-3.1-8B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: DeepSeek-R1
    litellm_params:
      model: huggingface/together/deepseek-ai/DeepSeek-R1
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: gpt-oss-120b
    litellm_params:
      model: huggingface/openai/gpt-oss-120b
      api_key: os.environ/HF_TOKEN
  # ============================
  # 
  # Model-specific parameters with example
  # 
  # ============================
  - model_name: ollama-phi3.5-3.8b
    litellm_params:
      model: ollama/phi3.5:3.8b
      api_base: https://zhengr-ollama.hf.space
      # Model-specific parameters
      #model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" 
      #api_base: "<your-api-base>"
      #api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
      #initial_prompt_value: "\n"
      #roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
      #final_prompt_value: "\n"
      #bos_token: "<s>"
      #eos_token: "</s>"
      #max_tokens: 4096

litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
  drop_params: True

general_settings: 
  #master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
  #alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env