import logging import os from injector import inject, singleton from llama_index.llms import MockLLM from llama_index.llms.base import LLM from fastapi import Depends from llama_index.llms import OpenAI from private_gpt.components.llm.prompt_helper import get_prompt_style from private_gpt.paths import models_path from private_gpt.settings.settings import Settings logger = logging.getLogger(__name__) @singleton class LLMComponent: llm: LLM @inject def __init__(self, settings: Settings) -> None: llm_mode = settings.llm.mode logger.info("Initializing the LLM in mode=%s", llm_mode) match settings.llm.mode: case "local": from llama_index.llms import LlamaCPP prompt_style_cls = get_prompt_style(settings.local.prompt_style) prompt_style = prompt_style_cls( default_system_prompt=settings.local.default_system_prompt ) model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true" self.llm = LlamaCPP( model_url=model_url, temperature=0.1, max_new_tokens=settings.llm.max_new_tokens, context_window=3900, generate_kwargs={}, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, verbose=True, ) case "sagemaker": from private_gpt.components.llm.custom.sagemaker import SagemakerLLM self.llm = SagemakerLLM( endpoint_name=settings.sagemaker.llm_endpoint_name, ) case "openai": from llama_index.llms import OpenAI openai_settings = settings.openai.api_key #default startup logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo") self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings) case "dynamic": from llama_index.llms import OpenAI openai_settings = settings.openai.api_key #default startup logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo") self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings) case "mock": self.llm = MockLLM() @inject def switch_model(self, new_model: str, settings: Settings) -> None: from llama_index.llms import LlamaCPP openai_settings = settings.openai.api_key if new_model == "gpt-3.5-turbo": self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings) elif new_model == "gpt-4": # Initialize with the new model self.llm = OpenAI(model="gpt-4", api_key=openai_settings) logger.info("Initializing the GPT Model in=%s", "gpt-4") elif new_model == "mistral-7B": model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true" #model_filename = os.path.basename(model_url) prompt_style_cls = get_prompt_style(settings.local.prompt_style) prompt_style = prompt_style_cls( default_system_prompt=settings.local.default_system_prompt ) self.llm = LlamaCPP( #model_path=str(models_path / settings.local.llm_hf_model_file), model_url= model_url, temperature=0.1, max_new_tokens=settings.llm.max_new_tokens, context_window=3900, generate_kwargs={}, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, verbose=True, )