Spaces:
Sleeping
Sleeping
File size: 5,213 Bytes
bf6d237 3078352 bf6d237 b0a13e1 bf6d237 dc85d57 fa8884f 0e5f1ef bf6d237 3078352 bf6d237 d150fb8 bf6d237 3078352 bf6d237 3078352 b0a13e1 3078352 bf6d237 7344079 42247c8 82e5cc3 2054738 82e5cc3 2054738 a06a3a4 42247c8 3078352 7344079 a7f0c1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import logging
from injector import inject, singleton
from llama_index.llms import MockLLM
from llama_index.llms.base import LLM
from fastapi import Depends
from llama_index.llms import OpenAI
from private_gpt.components.llm.prompt_helper import get_prompt_style
from private_gpt.paths import models_path
from private_gpt.settings.settings import Settings
import os
logger = logging.getLogger(__name__)
model_url: "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/raw/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
class LLMComponent:
llm: LLM
@inject
def __init__(self, settings: Settings) -> None:
llm_mode = settings.llm.mode
logger.info("Initializing the LLM in mode=%s", llm_mode)
match settings.llm.mode:
case "local":
from llama_index.llms import LlamaCPP
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
prompt_style = prompt_style_cls(
default_system_prompt=settings.local.default_system_prompt
)
self.llm = LlamaCPP(
model_path=str(models_path / settings.local.llm_hf_model_file),
#model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true",
temperature=0.1,
max_new_tokens=settings.llm.max_new_tokens,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": -1},
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
verbose=True,
)
case "sagemaker":
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
self.llm = SagemakerLLM(
endpoint_name=settings.sagemaker.llm_endpoint_name,
)
case "openai":
from llama_index.llms import OpenAI
openai_settings = settings.openai.api_key
#default startup
logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
case "mock":
self.llm = MockLLM()
case "dynamic":
from llama_index.llms import OpenAI
openai_settings = settings.openai.api_key
#default startup
logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
def switch_model(new_model: str) -> None:
nonlocal self
from llama_index.llms import LlamaCPP
openai_settings = settings.openai.api_key
if new_model == "gpt-3.5-turbo":
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
logger.info("Initializing the LLM Model in=%s", "gpt-3.5-turbo")
elif new_model == "gpt-4":
# Initialize with the new model
self.llm = OpenAI(model="gpt-4", api_key=os.environ.get("OPENAI_API_KEY"))
logger.info("Initializing the LLM Model in=%s", "gpt-4")
elif new_model == "mistral-7B":
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
prompt_style = prompt_style_cls(
default_system_prompt=settings.local.default_system_prompt
)
self.llm = LlamaCPP(
#model_path=str(models_path / settings.local.llm_hf_model_file),
model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true",
temperature=0.1,
max_new_tokens=settings.llm.max_new_tokens,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": -1},
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
verbose=True,
)
logger.info("Initializing the LLM Model in=%s", "Mistral-7B")
def switch_to_model(self, new_model: str):
if self.llm_mode == "dynamic":
self.switch_model(new_model) # Calls the nested function
else:
logger.warning("Model switching not supported in current mode: %s", self.llm_mode)
|