Spaces:

zhengr
/

litellm

Sleeping

File size: 2,529 Bytes

model_list:
  - model_name: Qwen3-235B-A22B
    litellm_params:
      model: huggingface/Qwen/Qwen3-235B-A22B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen2.5-72B-Instruct
    litellm_params:
      model: huggingface/Qwen/Qwen2.5-72B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: DeepSeek-R1-Distill-Qwen-32B
    litellm_params:
      model: huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen/QwQ-32B
    litellm_params:
      model: huggingface/Qwen/QwQ-32B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Llama-3.3-70B-Instruct
    litellm_params:
      model: huggingface/meta-llama/Llama-3.3-70B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: Qwen2.5-VL-32B-Instruct
    litellm_params:
      model: huggingface/Qwen/Qwen2.5-VL-32B-Instruct
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
  - model_name: DeepSeek-R1-Distill-Llama-70B
    litellm_params:
      model: huggingface/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
      api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env   
  - model_name: ollama-phi3.5-3.8b
    litellm_params:
      model: ollama/phi3.5:3.8b
      api_base: https://zhengr-ollama.hf.space
      # Model-specific parameters
      #model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" 
      #api_base: "<your-api-base>"
      #api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
      #initial_prompt_value: "\n"
      #roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
      #final_prompt_value: "\n"
      #bos_token: "<s>"
      #eos_token: "</s>"
      #max_tokens: 4096

litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
  drop_params: True

general_settings: 
  #master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
  #alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env