|
""" |
|
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer |
|
|
|
This is OpenAI compatible |
|
|
|
This file only contains param mapping logic |
|
|
|
API calling is done using the OpenAI SDK with an api_base |
|
""" |
|
|
|
from typing import Optional, Union |
|
|
|
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig |
|
|
|
|
|
class NvidiaNimConfig(OpenAIGPTConfig): |
|
""" |
|
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer |
|
|
|
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: |
|
""" |
|
|
|
temperature: Optional[int] = None |
|
top_p: Optional[int] = None |
|
frequency_penalty: Optional[int] = None |
|
presence_penalty: Optional[int] = None |
|
max_tokens: Optional[int] = None |
|
stop: Optional[Union[str, list]] = None |
|
|
|
def __init__( |
|
self, |
|
temperature: Optional[int] = None, |
|
top_p: Optional[int] = None, |
|
frequency_penalty: Optional[int] = None, |
|
presence_penalty: Optional[int] = None, |
|
max_tokens: Optional[int] = None, |
|
stop: Optional[Union[str, list]] = None, |
|
) -> None: |
|
locals_ = locals().copy() |
|
for key, value in locals_.items(): |
|
if key != "self" and value is not None: |
|
setattr(self.__class__, key, value) |
|
|
|
@classmethod |
|
def get_config(cls): |
|
return super().get_config() |
|
|
|
def get_supported_openai_params(self, model: str) -> list: |
|
""" |
|
Get the supported OpenAI params for the given model |
|
|
|
|
|
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference |
|
""" |
|
if model in [ |
|
"google/recurrentgemma-2b", |
|
"google/gemma-2-27b-it", |
|
"google/gemma-2-9b-it", |
|
"gemma-2-9b-it", |
|
]: |
|
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] |
|
elif model == "nvidia/nemotron-4-340b-instruct": |
|
return [ |
|
"stream", |
|
"temperature", |
|
"top_p", |
|
"max_tokens", |
|
"max_completion_tokens", |
|
] |
|
elif model == "nvidia/nemotron-4-340b-reward": |
|
return [ |
|
"stream", |
|
] |
|
elif model in ["google/codegemma-1.1-7b"]: |
|
|
|
return [ |
|
"stream", |
|
"temperature", |
|
"top_p", |
|
"frequency_penalty", |
|
"presence_penalty", |
|
"max_tokens", |
|
"max_completion_tokens", |
|
"stop", |
|
] |
|
else: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return [ |
|
"stream", |
|
"temperature", |
|
"top_p", |
|
"frequency_penalty", |
|
"presence_penalty", |
|
"max_tokens", |
|
"max_completion_tokens", |
|
"stop", |
|
"seed", |
|
] |
|
|
|
def map_openai_params( |
|
self, |
|
non_default_params: dict, |
|
optional_params: dict, |
|
model: str, |
|
drop_params: bool, |
|
) -> dict: |
|
supported_openai_params = self.get_supported_openai_params(model=model) |
|
for param, value in non_default_params.items(): |
|
if param == "max_completion_tokens": |
|
optional_params["max_tokens"] = value |
|
elif param in supported_openai_params: |
|
optional_params[param] = value |
|
return optional_params |
|
|