import logging from injector import inject, singleton from llama_index.llms import MockLLM from llama_index.llms.base import LLM from fastapi import Depends from llama_index.llms import OpenAI from private_gpt.components.llm.prompt_helper import get_prompt_style from private_gpt.paths import models_path from private_gpt.settings.settings import Settings import os logger = logging.getLogger(__name__) @singleton class LLMComponent: llm: LLM @inject def __init__(self, settings: Settings) -> None: llm_mode = settings.llm.mode logger.info("Initializing the LLM in mode=%s", llm_mode) match settings.llm.mode: case "local": from llama_index.llms import LlamaCPP prompt_style_cls = get_prompt_style(settings.local.prompt_style) prompt_style = prompt_style_cls( default_system_prompt=settings.local.default_system_prompt ) self.llm = LlamaCPP( model_path=str(models_path / settings.local.llm_hf_model_file), temperature=0.1, max_new_tokens=settings.llm.max_new_tokens, context_window=3900, generate_kwargs={}, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, verbose=True, ) case "sagemaker": from private_gpt.components.llm.custom.sagemaker import SagemakerLLM self.llm = SagemakerLLM( endpoint_name=settings.sagemaker.llm_endpoint_name, ) case "openai": from llama_index.llms import OpenAI openai_settings = settings.openai.api_key #default startup logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo") self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY")) case "mock": self.llm = MockLLM() @inject def switch_model(self, new_model: str, settings: Settings) -> None: openai_settings = settings.openai.api_key if type(self.llm) == OpenAI: if new_model == "gpt-3.5-turbo": self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY")) elif new_model == "gpt-4": # Initialize with the new model self.llm = OpenAI(model="gpt-4", api_key=os.environ.get("OPENAI_API_KEY")) logger.info("Initializing the GPT Model in=%s", "gpt-4")