Spaces:
Sleeping
Sleeping
Update private_gpt/components/llm/llm_component.py
Browse files
private_gpt/components/llm/llm_component.py
CHANGED
@@ -3,16 +3,15 @@ import logging
|
|
3 |
from injector import inject, singleton
|
4 |
from llama_index.llms import MockLLM
|
5 |
from llama_index.llms.base import LLM
|
|
|
|
|
6 |
|
7 |
from private_gpt.components.llm.prompt_helper import get_prompt_style
|
8 |
from private_gpt.paths import models_path
|
9 |
from private_gpt.settings.settings import Settings
|
10 |
|
11 |
-
import os
|
12 |
-
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
-
|
16 |
@singleton
|
17 |
class LLMComponent:
|
18 |
llm: LLM
|
@@ -21,43 +20,52 @@ class LLMComponent:
|
|
21 |
def __init__(self, settings: Settings) -> None:
|
22 |
llm_mode = settings.llm.mode
|
23 |
logger.info("Initializing the LLM in mode=%s", llm_mode)
|
|
|
|
|
24 |
match settings.llm.mode:
|
25 |
case "local":
|
26 |
from llama_index.llms import LlamaCPP
|
27 |
-
|
28 |
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
|
29 |
prompt_style = prompt_style_cls(
|
30 |
default_system_prompt=settings.local.default_system_prompt
|
31 |
)
|
32 |
-
|
33 |
self.llm = LlamaCPP(
|
34 |
model_path=str(models_path / settings.local.llm_hf_model_file),
|
35 |
temperature=0.1,
|
36 |
max_new_tokens=settings.llm.max_new_tokens,
|
37 |
-
# llama2 has a context window of 4096 tokens,
|
38 |
-
# but we set it lower to allow for some wiggle room
|
39 |
context_window=3900,
|
40 |
generate_kwargs={},
|
41 |
-
# All to GPU
|
42 |
model_kwargs={"n_gpu_layers": -1},
|
43 |
-
# transform inputs into Llama2 format
|
44 |
messages_to_prompt=prompt_style.messages_to_prompt,
|
45 |
completion_to_prompt=prompt_style.completion_to_prompt,
|
46 |
verbose=True,
|
47 |
)
|
48 |
-
|
49 |
case "sagemaker":
|
50 |
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
51 |
-
|
52 |
self.llm = SagemakerLLM(
|
53 |
endpoint_name=settings.sagemaker.llm_endpoint_name,
|
54 |
)
|
55 |
case "openai":
|
56 |
from llama_index.llms import OpenAI
|
|
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
case "mock":
|
61 |
self.llm = MockLLM()
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
|
|
3 |
from injector import inject, singleton
|
4 |
from llama_index.llms import MockLLM
|
5 |
from llama_index.llms.base import LLM
|
6 |
+
from fastapi import Depends
|
7 |
+
from llama_index.llms import OpenAI
|
8 |
|
9 |
from private_gpt.components.llm.prompt_helper import get_prompt_style
|
10 |
from private_gpt.paths import models_path
|
11 |
from private_gpt.settings.settings import Settings
|
12 |
|
|
|
|
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
@singleton
|
16 |
class LLMComponent:
|
17 |
llm: LLM
|
|
|
20 |
def __init__(self, settings: Settings) -> None:
|
21 |
llm_mode = settings.llm.mode
|
22 |
logger.info("Initializing the LLM in mode=%s", llm_mode)
|
23 |
+
|
24 |
+
|
25 |
match settings.llm.mode:
|
26 |
case "local":
|
27 |
from llama_index.llms import LlamaCPP
|
|
|
28 |
prompt_style_cls = get_prompt_style(settings.local.prompt_style)
|
29 |
prompt_style = prompt_style_cls(
|
30 |
default_system_prompt=settings.local.default_system_prompt
|
31 |
)
|
|
|
32 |
self.llm = LlamaCPP(
|
33 |
model_path=str(models_path / settings.local.llm_hf_model_file),
|
34 |
temperature=0.1,
|
35 |
max_new_tokens=settings.llm.max_new_tokens,
|
|
|
|
|
36 |
context_window=3900,
|
37 |
generate_kwargs={},
|
|
|
38 |
model_kwargs={"n_gpu_layers": -1},
|
|
|
39 |
messages_to_prompt=prompt_style.messages_to_prompt,
|
40 |
completion_to_prompt=prompt_style.completion_to_prompt,
|
41 |
verbose=True,
|
42 |
)
|
|
|
43 |
case "sagemaker":
|
44 |
from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
|
|
|
45 |
self.llm = SagemakerLLM(
|
46 |
endpoint_name=settings.sagemaker.llm_endpoint_name,
|
47 |
)
|
48 |
case "openai":
|
49 |
from llama_index.llms import OpenAI
|
50 |
+
openai_settings = settings.openai.api_key
|
51 |
|
52 |
+
#default startup
|
53 |
+
logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
|
54 |
+
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
|
55 |
+
|
56 |
case "mock":
|
57 |
self.llm = MockLLM()
|
58 |
|
59 |
+
@inject
|
60 |
+
def switch_model(self, new_model: str, settings: Settings) -> None:
|
61 |
+
openai_settings = settings.openai.api_key
|
62 |
+
if type(self.llm) == OpenAI:
|
63 |
+
if new_model == "gpt-3.5-turbo":
|
64 |
+
self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
|
65 |
+
elif new_model == "gpt-4":
|
66 |
+
# Initialize with the new model
|
67 |
+
self.llm = OpenAI(model="gpt-4", api_key=openai_settings)
|
68 |
+
logger.info("Initializing the GPT Model in=%s", "gpt-4")
|
69 |
+
|
70 |
+
|
71 |
|