Spaces:

Raju2024
/

TestLLM

Running

App Files Files Community

TestLLM / proxy_server_config.yaml

Raju2024

Upload 1072 files

e3278e4 verified about 1 month ago

raw

history blame

7.65 kB

	model_list:
	- model_name: gpt-3.5-turbo-end-user-test
	litellm_params:
	model: gpt-3.5-turbo
	region_name: "eu"
	model_info:
	id: "1"
	- model_name: gpt-3.5-turbo-end-user-test
	litellm_params:
	model: azure/chatgpt-v-2
	api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
	api_version: "2023-05-15"
	api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
	- model_name: gpt-3.5-turbo
	litellm_params:
	model: azure/chatgpt-v-2
	api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
	api_version: "2023-05-15"
	api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
	- model_name: gpt-3.5-turbo-large
	litellm_params:
	model: "gpt-3.5-turbo-1106"
	api_key: os.environ/OPENAI_API_KEY
	rpm: 480
	timeout: 300
	stream_timeout: 60
	- model_name: gpt-4
	litellm_params:
	model: azure/chatgpt-v-2
	api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
	api_version: "2023-05-15"
	api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
	rpm: 480
	timeout: 300
	stream_timeout: 60
	- model_name: sagemaker-completion-model
	litellm_params:
	model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
	input_cost_per_second: 0.000420
	- model_name: text-embedding-ada-002
	litellm_params:
	model: azure/azure-embedding-model
	api_key: os.environ/AZURE_API_KEY
	api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
	api_version: "2023-05-15"
	model_info:
	mode: embedding
	base_model: text-embedding-ada-002
	- model_name: dall-e-2
	litellm_params:
	model: azure/
	api_version: 2023-06-01-preview
	api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
	api_key: os.environ/AZURE_API_KEY
	- model_name: openai-dall-e-3
	litellm_params:
	model: dall-e-3
	- model_name: fake-openai-endpoint
	litellm_params:
	model: openai/fake
	api_key: fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	- model_name: fake-openai-endpoint-2
	litellm_params:
	model: openai/my-fake-model
	api_key: my-fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	stream_timeout: 0.001
	rpm: 1
	- model_name: fake-openai-endpoint-3
	litellm_params:
	model: openai/my-fake-model
	api_key: my-fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	stream_timeout: 0.001
	rpm: 1000
	- model_name: fake-openai-endpoint-4
	litellm_params:
	model: openai/my-fake-model
	api_key: my-fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	num_retries: 50
	- model_name: fake-openai-endpoint-3
	litellm_params:
	model: openai/my-fake-model-2
	api_key: my-fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	stream_timeout: 0.001
	rpm: 1000
	- model_name: bad-model
	litellm_params:
	model: openai/bad-model
	api_key: os.environ/OPENAI_API_KEY
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	mock_timeout: True
	timeout: 60
	rpm: 1000
	model_info:
	health_check_timeout: 1
	- model_name: good-model
	litellm_params:
	model: openai/bad-model
	api_key: os.environ/OPENAI_API_KEY
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	rpm: 1000
	model_info:
	health_check_timeout: 1
	- model_name: "*"
	litellm_params:
	model: openai/*
	api_key: os.environ/OPENAI_API_KEY


	# provider specific wildcard routing
	- model_name: "anthropic/*"
	litellm_params:
	model: "anthropic/*"
	api_key: os.environ/ANTHROPIC_API_KEY
	- model_name: "bedrock/*"
	litellm_params:
	model: "bedrock/*"
	- model_name: "groq/*"
	litellm_params:
	model: "groq/*"
	api_key: os.environ/GROQ_API_KEY
	- model_name: mistral-embed
	litellm_params:
	model: mistral/mistral-embed
	- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
	litellm_params:
	model: text-completion-openai/gpt-3.5-turbo-instruct
	- model_name: fake-openai-endpoint-5
	litellm_params:
	model: openai/my-fake-model
	api_key: my-fake-key
	api_base: https://exampleopenaiendpoint-production.up.railway.app/
	timeout: 1
	litellm_settings:
	# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
	drop_params: True
	# max_budget: 100
	# budget_duration: 30d
	num_retries: 5
	request_timeout: 600
	telemetry: False
	context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
	default_team_settings:
	- team_id: team-1
	success_callback: ["langfuse"]
	failure_callback: ["langfuse"]
	langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
	langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
	- team_id: team-2
	success_callback: ["langfuse"]
	failure_callback: ["langfuse"]
	langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
	langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
	langfuse_host: https://us.cloud.langfuse.com

	# For /fine_tuning/jobs endpoints
	finetune_settings:
	- custom_llm_provider: azure
	api_base: os.environ/AZURE_API_BASE
	api_key: os.environ/AZURE_API_KEY
	api_version: "2023-03-15-preview"
	- custom_llm_provider: openai
	api_key: os.environ/OPENAI_API_KEY

	# for /files endpoints
	files_settings:
	- custom_llm_provider: azure
	api_base: os.environ/AZURE_API_BASE
	api_key: os.environ/AZURE_API_KEY
	api_version: "2023-03-15-preview"
	- custom_llm_provider: openai
	api_key: os.environ/OPENAI_API_KEY

	router_settings:
	routing_strategy: usage-based-routing-v2
	redis_host: os.environ/REDIS_HOST
	redis_password: os.environ/REDIS_PASSWORD
	redis_port: os.environ/REDIS_PORT
	enable_pre_call_checks: true
	model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"}

	general_settings:
	master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
	store_model_in_db: True
	proxy_budget_rescheduler_min_time: 60
	proxy_budget_rescheduler_max_time: 64
	proxy_batch_write_at: 1
	database_connection_pool_limit: 10
	# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy

	pass_through_endpoints:
	- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
	target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
	headers: # headers to forward to this URL
	content-type: application/json # (Optional) Extra Headers to pass to this endpoint
	accept: application/json
	forward_headers: True

	# environment_variables:
	# settings for using redis caching
	# REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
	# REDIS_PORT: "16337"
	# REDIS_PASSWORD: