import logging from injector import inject, singleton from llama_index.llms import MockLLM from llama_index.llms.base import LLM from fastapi import Depends from llama_index.llms import OpenAI from private_gpt.components.llm.prompt_helper import get_prompt_style from private_gpt.paths import models_path from private_gpt.settings.settings import Settings import os logger = logging.getLogger(__name__) model_url: "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/raw/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf" class LLMComponent: llm: LLM @inject def __init__(self, settings: Settings) -> None: llm_mode = settings.llm.mode logger.info("Initializing the LLM in mode=%s", llm_mode) match settings.llm.mode: case "local": from llama_index.llms import LlamaCPP prompt_style_cls = get_prompt_style(settings.local.prompt_style) prompt_style = prompt_style_cls( default_system_prompt=settings.local.default_system_prompt ) self.llm = LlamaCPP( model_path=str(models_path / settings.local.llm_hf_model_file), #model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true", temperature=0.1, max_new_tokens=settings.llm.max_new_tokens, context_window=3900, generate_kwargs={}, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, verbose=True, ) case "sagemaker": from private_gpt.components.llm.custom.sagemaker import SagemakerLLM self.llm = SagemakerLLM( endpoint_name=settings.sagemaker.llm_endpoint_name, ) case "openai": from llama_index.llms import OpenAI openai_settings = settings.openai.api_key #default startup logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo") self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY")) case "mock": self.llm = MockLLM() case "dynamic": from llama_index.llms import OpenAI openai_settings = settings.openai.api_key #default startup logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo") self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY")) def switch_model(new_model: str) -> None: nonlocal self from llama_index.llms import LlamaCPP openai_settings = settings.openai.api_key if new_model == "gpt-3.5-turbo": self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY")) logger.info("Initializing the LLM Model in=%s", "gpt-3.5-turbo") elif new_model == "gpt-4": # Initialize with the new model self.llm = OpenAI(model="gpt-4", api_key=os.environ.get("OPENAI_API_KEY")) logger.info("Initializing the LLM Model in=%s", "gpt-4") elif new_model == "mistral-7B": prompt_style_cls = get_prompt_style(settings.local.prompt_style) prompt_style = prompt_style_cls( default_system_prompt=settings.local.default_system_prompt ) self.llm = LlamaCPP( #model_path=str(models_path / settings.local.llm_hf_model_file), model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true", temperature=0.1, max_new_tokens=settings.llm.max_new_tokens, context_window=3900, generate_kwargs={}, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, verbose=True, ) logger.info("Initializing the LLM Model in=%s", "Mistral-7B") def switch_to_model(self, new_model: str): if self.llm_mode == "dynamic": self.switch_model(new_model) # Calls the nested function else: logger.warning("Model switching not supported in current mode: %s", self.llm_mode)