from typing import List ROUTER_MAX_FALLBACKS = 5 DEFAULT_BATCH_SIZE = 512 DEFAULT_FLUSH_INTERVAL_SECONDS = 5 DEFAULT_MAX_RETRIES = 2 DEFAULT_FAILURE_THRESHOLD_PERCENT = ( 0.5 # default cooldown a deployment if 50% of requests fail in a given minute ) DEFAULT_COOLDOWN_TIME_SECONDS = 5 DEFAULT_REPLICATE_POLLING_RETRIES = 5 DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1 DEFAULT_IMAGE_TOKEN_COUNT = 250 DEFAULT_IMAGE_WIDTH = 300 DEFAULT_IMAGE_HEIGHT = 300 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. #### RELIABILITY #### REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. #### Networking settings #### request_timeout: float = 6000 # time in seconds LITELLM_CHAT_PROVIDERS = [ "openai", "openai_like", "xai", "custom_openai", "text-completion-openai", "cohere", "cohere_chat", "clarifai", "anthropic", "anthropic_text", "replicate", "huggingface", "together_ai", "openrouter", "vertex_ai", "vertex_ai_beta", "gemini", "ai21", "baseten", "azure", "azure_text", "azure_ai", "sagemaker", "sagemaker_chat", "bedrock", "vllm", "nlp_cloud", "petals", "oobabooga", "ollama", "ollama_chat", "deepinfra", "perplexity", "mistral", "groq", "nvidia_nim", "cerebras", "ai21_chat", "volcengine", "codestral", "text-completion-codestral", "deepseek", "sambanova", "maritalk", "cloudflare", "fireworks_ai", "friendliai", "watsonx", "watsonx_text", "triton", "predibase", "databricks", "empower", "github", "custom", "litellm_proxy", "hosted_vllm", "lm_studio", "galadriel", ] OPENAI_CHAT_COMPLETION_PARAMS = [ "functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stream_options", "stop", "max_completion_tokens", "modalities", "prediction", "audio", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", "parallel_tool_calls", "logprobs", "top_logprobs", "reasoning_effort", "extra_headers", ] openai_compatible_endpoints: List = [ "api.perplexity.ai", "api.endpoints.anyscale.com/v1", "api.deepinfra.com/v1/openai", "api.mistral.ai/v1", "codestral.mistral.ai/v1/chat/completions", "codestral.mistral.ai/v1/fim/completions", "api.groq.com/openai/v1", "https://integrate.api.nvidia.com/v1", "api.deepseek.com/v1", "api.together.xyz/v1", "app.empower.dev/api/v1", "https://api.friendli.ai/serverless/v1", "api.sambanova.ai/v1", "api.x.ai/v1", "api.galadriel.ai/v1", ] openai_compatible_providers: List = [ "anyscale", "mistral", "groq", "nvidia_nim", "cerebras", "sambanova", "ai21_chat", "ai21", "volcengine", "codestral", "deepseek", "deepinfra", "perplexity", "xinference", "xai", "together_ai", "fireworks_ai", "empower", "friendliai", "azure_ai", "github", "litellm_proxy", "hosted_vllm", "lm_studio", "galadriel", ] openai_text_completion_compatible_providers: List = ( [ # providers that support `/v1/completions` "together_ai", "fireworks_ai", "hosted_vllm", ] ) _openai_like_providers: List = [ "predibase", "databricks", "watsonx", ] # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk # well supported replicate llms replicate_models: List = [ # llama replicate supported LLMs "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", # Vicuna "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", # Flan T-5 "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f", # Others "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", ] clarifai_models: List = [ "clarifai/meta.Llama-3.Llama-3-8B-Instruct", "clarifai/gcp.generate.gemma-1_1-7b-it", "clarifai/mistralai.completion.mixtral-8x22B", "clarifai/cohere.generate.command-r-plus", "clarifai/databricks.drbx.dbrx-instruct", "clarifai/mistralai.completion.mistral-large", "clarifai/mistralai.completion.mistral-medium", "clarifai/mistralai.completion.mistral-small", "clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1", "clarifai/gcp.generate.gemma-2b-it", "clarifai/gcp.generate.gemma-7b-it", "clarifai/deci.decilm.deciLM-7B-instruct", "clarifai/mistralai.completion.mistral-7B-Instruct", "clarifai/gcp.generate.gemini-pro", "clarifai/anthropic.completion.claude-v1", "clarifai/anthropic.completion.claude-instant-1_2", "clarifai/anthropic.completion.claude-instant", "clarifai/anthropic.completion.claude-v2", "clarifai/anthropic.completion.claude-2_1", "clarifai/meta.Llama-2.codeLlama-70b-Python", "clarifai/meta.Llama-2.codeLlama-70b-Instruct", "clarifai/openai.completion.gpt-3_5-turbo-instruct", "clarifai/meta.Llama-2.llama2-7b-chat", "clarifai/meta.Llama-2.llama2-13b-chat", "clarifai/meta.Llama-2.llama2-70b-chat", "clarifai/openai.chat-completion.gpt-4-turbo", "clarifai/microsoft.text-generation.phi-2", "clarifai/meta.Llama-2.llama2-7b-chat-vllm", "clarifai/upstage.solar.solar-10_7b-instruct", "clarifai/openchat.openchat.openchat-3_5-1210", "clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B", "clarifai/gcp.generate.text-bison", "clarifai/meta.Llama-2.llamaGuard-7b", "clarifai/fblgit.una-cybertron.una-cybertron-7b-v2", "clarifai/openai.chat-completion.GPT-4", "clarifai/openai.chat-completion.GPT-3_5-turbo", "clarifai/ai21.complete.Jurassic2-Grande", "clarifai/ai21.complete.Jurassic2-Grande-Instruct", "clarifai/ai21.complete.Jurassic2-Jumbo-Instruct", "clarifai/ai21.complete.Jurassic2-Jumbo", "clarifai/ai21.complete.Jurassic2-Large", "clarifai/cohere.generate.cohere-generate-command", "clarifai/wizardlm.generate.wizardCoder-Python-34B", "clarifai/wizardlm.generate.wizardLM-70B", "clarifai/tiiuae.falcon.falcon-40b-instruct", "clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat", "clarifai/gcp.generate.code-gecko", "clarifai/gcp.generate.code-bison", "clarifai/mistralai.completion.mistral-7B-OpenOrca", "clarifai/mistralai.completion.openHermes-2-mistral-7B", "clarifai/wizardlm.generate.wizardLM-13B", "clarifai/huggingface-research.zephyr.zephyr-7B-alpha", "clarifai/wizardlm.generate.wizardCoder-15B", "clarifai/microsoft.text-generation.phi-1_5", "clarifai/databricks.Dolly-v2.dolly-v2-12b", "clarifai/bigcode.code.StarCoder", "clarifai/salesforce.xgen.xgen-7b-8k-instruct", "clarifai/mosaicml.mpt.mpt-7b-instruct", "clarifai/anthropic.completion.claude-3-opus", "clarifai/anthropic.completion.claude-3-sonnet", "clarifai/gcp.generate.gemini-1_5-pro", "clarifai/gcp.generate.imagen-2", "clarifai/salesforce.blip.general-english-image-caption-blip-2", ] huggingface_models: List = [ "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-hf", "meta-llama/Llama-2-13b-chat-hf", "meta-llama/Llama-2-70b-hf", "meta-llama/Llama-2-70b-chat-hf", "meta-llama/Llama-2-7b", "meta-llama/Llama-2-7b-chat", "meta-llama/Llama-2-13b", "meta-llama/Llama-2-13b-chat", "meta-llama/Llama-2-70b", "meta-llama/Llama-2-70b-chat", ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers empower_models = [ "empower/empower-functions", "empower/empower-functions-small", ] together_ai_models: List = [ # llama llms - chat "togethercomputer/llama-2-70b-chat", # llama llms - language / instruct "togethercomputer/llama-2-70b", "togethercomputer/LLaMA-2-7B-32K", "togethercomputer/Llama-2-7B-32K-Instruct", "togethercomputer/llama-2-7b", # falcon llms "togethercomputer/falcon-40b-instruct", "togethercomputer/falcon-7b-instruct", # alpaca "togethercomputer/alpaca-7b", # chat llms "HuggingFaceH4/starchat-alpha", # code llms "togethercomputer/CodeLlama-34b", "togethercomputer/CodeLlama-34b-Instruct", "togethercomputer/CodeLlama-34b-Python", "defog/sqlcoder", "NumbersStation/nsql-llama-2-7B", "WizardLM/WizardCoder-15B-V1.0", "WizardLM/WizardCoder-Python-34B-V1.0", # language llms "NousResearch/Nous-Hermes-Llama2-13b", "Austism/chronos-hermes-13b", "upstage/SOLAR-0-70b-16bit", "WizardLM/WizardLM-70B-V1.0", ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...) baseten_models: List = [ "qvv0xeq", "q841o8w", "31dxrj3", ] # FALCON 7B # WizardLM # Mosaic ML open_ai_embedding_models: List = ["text-embedding-ada-002"] cohere_embedding_models: List = [ "embed-english-v3.0", "embed-english-light-v3.0", "embed-multilingual-v3.0", "embed-english-v2.0", "embed-english-light-v2.0", "embed-multilingual-v2.0", ] bedrock_embedding_models: List = [ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3", ] OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"] HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when converting response format to tool call ########################### Logging Callback Constants ########################### AZURE_STORAGE_MSFT_VERSION = "2019-07-07" ########################### LiteLLM Proxy Specific Constants ########################### ######################################################################################## MAX_SPENDLOG_ROWS_TO_QUERY = ( 1_000_000 # if spendLogs has more than 1M rows, do not query the DB ) # makes it clear this is a rate limit error for a litellm virtual key RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash" # pass through route constansts BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [ "agents/", "knowledgebases/", "flows/", "retrieveAndGenerate/", "rerank/", "generateQuery/", "optimize-prompt/", ] BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"