Spaces:
Sleeping
Sleeping
File size: 4,175 Bytes
bf6d237 af78253 bf6d237 3078352 bf6d237 a09836d bf6d237 af78253 bf6d237 3078352 af78253 bf6d237 f6669fb bf6d237 f6669fb bf6d237 3078352 bf6d237 3078352 af78253 42247c8 af78253 ea4b6eb af78253 ea4b6eb af78253 1032583 af78253 62bc489 1032583 af78253 62bc489 af78253 1032583 62bc489 af78253 62bc489 af78253 62bc489 95a57e5 62bc489 3078352 7344079 a7f0c1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import logging
import os
from injector import inject, singleton
from llama_index.llms import MockLLM
from llama_index.llms.base import LLM
from fastapi import Depends
from llama_index.llms import OpenAI
from private_gpt.components.llm.prompt_helper import get_prompt_style
from private_gpt.paths import models_path
from private_gpt.settings.settings import Settings
logger = logging.getLogger(__name__)
@singleton
class LLMComponent:
llm: LLM
@inject
def __init__(self, settings: Settings) -> None:
llm_mode = settings.llm.mode
logger.info("Initializing the LLM in mode=%s", llm_mode)
match settings.llm.mode:
case "local":
from llama_index.llms import LlamaCPP
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
prompt_style = prompt_style_cls(
default_system_prompt=settings.local.default_system_prompt
)
model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"
self.llm = LlamaCPP(
model_url=model_url,
temperature=0.1,
max_new_tokens=settings.llm.max_new_tokens,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": -1},
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
verbose=True,
)
case "sagemaker":
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
self.llm = SagemakerLLM(
endpoint_name=settings.sagemaker.llm_endpoint_name,
)
case "openai":
from llama_index.llms import OpenAI
openai_settings = settings.openai.api_key
#default startup
logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
case "dynamic":
from llama_index.llms import OpenAI
openai_settings = settings.openai.api_key
#default startup
logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
case "mock":
self.llm = MockLLM()
@inject
def switch_model(self, new_model: str, settings: Settings) -> None:
from llama_index.llms import LlamaCPP
openai_settings = settings.openai.api_key
if new_model == "gpt-3.5-turbo":
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
elif new_model == "gpt-4":
# Initialize with the new model
self.llm = OpenAI(model="gpt-4", api_key=openai_settings)
logger.info("Initializing the GPT Model in=%s", "gpt-4")
elif new_model == "mistral-7B":
model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"
#model_filename = os.path.basename(model_url)
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
prompt_style = prompt_style_cls(
default_system_prompt=settings.local.default_system_prompt
)
self.llm = LlamaCPP(
#model_path=str(models_path / settings.local.llm_hf_model_file),
model_url= model_url,
temperature=0.1,
max_new_tokens=settings.llm.max_new_tokens,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": -1},
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
verbose=True,
)
|