File size: 4,599 Bytes
e3278e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
"""
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
This is OpenAI compatible
This file only contains param mapping logic
API calling is done using the OpenAI SDK with an api_base
"""
from typing import Optional, Union
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
class NvidiaNimConfig(OpenAIGPTConfig):
"""
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
"""
temperature: Optional[int] = None
top_p: Optional[int] = None
frequency_penalty: Optional[int] = None
presence_penalty: Optional[int] = None
max_tokens: Optional[int] = None
stop: Optional[Union[str, list]] = None
def __init__(
self,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
frequency_penalty: Optional[int] = None,
presence_penalty: Optional[int] = None,
max_tokens: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return super().get_config()
def get_supported_openai_params(self, model: str) -> list:
"""
Get the supported OpenAI params for the given model
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
"""
if model in [
"google/recurrentgemma-2b",
"google/gemma-2-27b-it",
"google/gemma-2-9b-it",
"gemma-2-9b-it",
]:
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
elif model == "nvidia/nemotron-4-340b-instruct":
return [
"stream",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
]
elif model == "nvidia/nemotron-4-340b-reward":
return [
"stream",
]
elif model in ["google/codegemma-1.1-7b"]:
# most params - but no 'seed' :(
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
]
else:
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here
# "upstage/solar-10.7b-instruct",
# "snowflake/arctic",
# "seallms/seallm-7b-v2.5",
# "nvidia/llama3-chatqa-1.5-8b",
# "nvidia/llama3-chatqa-1.5-70b",
# "mistralai/mistral-large",
# "mistralai/mixtral-8x22b-instruct-v0.1",
# "mistralai/mixtral-8x7b-instruct-v0.1",
# "mistralai/mistral-7b-instruct-v0.3",
# "mistralai/mistral-7b-instruct-v0.2",
# "mistralai/codestral-22b-instruct-v0.1",
# "microsoft/phi-3-small-8k-instruct",
# "microsoft/phi-3-small-128k-instruct",
# "microsoft/phi-3-mini-4k-instruct",
# "microsoft/phi-3-mini-128k-instruct",
# "microsoft/phi-3-medium-4k-instruct",
# "microsoft/phi-3-medium-128k-instruct",
# "meta/llama3-70b-instruct",
# "meta/llama3-8b-instruct",
# "meta/llama2-70b",
# "meta/codellama-70b",
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
"seed",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_openai_params = self.get_supported_openai_params(model=model)
for param, value in non_default_params.items():
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
elif param in supported_openai_params:
optional_params[param] = value
return optional_params
|