Spaces:

zhengr
/

litellm

Sleeping

litellm / config.yaml

Update config.yaml

00f689b verified 6 months ago

1.39 kB

	model_list:
	- model_name: Qwen3-235B-A22B
	litellm_params:
	model: huggingface/hf-inference/models/Qwen/Qwen3-235B-A22B
	api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
	- model_name: ollama-phi3.5-3.8b
	litellm_params:
	model: ollama/phi3.5:3.8b
	api_base: https://zhengr-ollama.hf.space
	# Model-specific parameters
	#model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
	#api_base: "<your-api-base>"
	#api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
	#initial_prompt_value: "\n"
	#roles: {"system":{"pre_message":"<\|im_start\|>system\n", "post_message":"<\|im_end\|>"}, "assistant":{"pre_message":"<\|im_start\|>assistant\n","post_message":"<\|im_end\|>"}, "user":{"pre_message":"<\|im_start\|>user\n","post_message":"<\|im_end\|>"}}
	#final_prompt_value: "\n"
	#bos_token: "<s>"
	#eos_token: "</s>"
	#max_tokens: 4096

	litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
	drop_params: True

	general_settings:
	#master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
	#alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env