File size: 5,213 Bytes
bf6d237
 
 
 
 
3078352
 
bf6d237
 
 
 
 
b0a13e1
bf6d237
 
dc85d57
fa8884f
0e5f1ef
bf6d237
 
 
 
 
 
 
3078352
 
bf6d237
 
 
 
 
 
 
 
d150fb8
 
bf6d237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3078352
bf6d237
3078352
 
b0a13e1
3078352
bf6d237
 
7344079
42247c8
 
 
 
 
 
 
 
82e5cc3
 
2054738
 
 
 
 
 
 
 
 
 
 
 
 
82e5cc3
2054738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a06a3a4
 
 
 
 
 
42247c8
 
 
 
3078352
 
7344079
a7f0c1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import logging

from injector import inject, singleton
from llama_index.llms import MockLLM
from llama_index.llms.base import LLM
from fastapi import Depends
from llama_index.llms import OpenAI

from private_gpt.components.llm.prompt_helper import get_prompt_style
from private_gpt.paths import models_path
from private_gpt.settings.settings import Settings

import os
logger = logging.getLogger(__name__)

model_url: "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/raw/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"


class LLMComponent:
    llm: LLM

    @inject
    def __init__(self, settings: Settings) -> None:
        llm_mode = settings.llm.mode
        logger.info("Initializing the LLM in mode=%s", llm_mode)
        
        
        match settings.llm.mode:
            case "local":
                from llama_index.llms import LlamaCPP
                prompt_style_cls = get_prompt_style(settings.local.prompt_style)
                prompt_style = prompt_style_cls(
                    default_system_prompt=settings.local.default_system_prompt
                )
                self.llm = LlamaCPP(
                    model_path=str(models_path / settings.local.llm_hf_model_file),
                    #model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true",
                    temperature=0.1,
                    max_new_tokens=settings.llm.max_new_tokens,
                    context_window=3900,
                    generate_kwargs={},
                    model_kwargs={"n_gpu_layers": -1},
                    messages_to_prompt=prompt_style.messages_to_prompt,
                    completion_to_prompt=prompt_style.completion_to_prompt,
                    verbose=True,
                )
            case "sagemaker":
                from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
                self.llm = SagemakerLLM(
                    endpoint_name=settings.sagemaker.llm_endpoint_name,
                )
            case "openai":
                from llama_index.llms import OpenAI
                openai_settings = settings.openai.api_key

                #default startup
                logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
                self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
  
            case "mock":
                self.llm = MockLLM()


            case "dynamic":
                from llama_index.llms import OpenAI
                openai_settings = settings.openai.api_key

                #default startup
                logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
                self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
                
                
                def switch_model(new_model: str) -> None:
                    nonlocal self 
                    from llama_index.llms import LlamaCPP
                    openai_settings = settings.openai.api_key
                    
                    if new_model == "gpt-3.5-turbo":
                        self.llm = OpenAI(model="gpt-3.5-turbo", api_key=os.environ.get("OPENAI_API_KEY"))
                        logger.info("Initializing the LLM Model in=%s", "gpt-3.5-turbo")
                        
                    elif new_model == "gpt-4":
                        # Initialize with the new model 
                        self.llm = OpenAI(model="gpt-4", api_key=os.environ.get("OPENAI_API_KEY"))
                        logger.info("Initializing the LLM Model in=%s", "gpt-4")
            
                    
                    elif new_model == "mistral-7B":
                        prompt_style_cls = get_prompt_style(settings.local.prompt_style)
                        prompt_style = prompt_style_cls(
                            default_system_prompt=settings.local.default_system_prompt
                        )
                        self.llm = LlamaCPP(
                            #model_path=str(models_path / settings.local.llm_hf_model_file),
                            model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true",
                            temperature=0.1,
                            max_new_tokens=settings.llm.max_new_tokens,
                            context_window=3900,
                            generate_kwargs={},
                            model_kwargs={"n_gpu_layers": -1},
                            messages_to_prompt=prompt_style.messages_to_prompt,
                            completion_to_prompt=prompt_style.completion_to_prompt,
                            verbose=True,
                        )
                        logger.info("Initializing the LLM Model in=%s", "Mistral-7B")
                        
        def switch_to_model(self, new_model: str):
            if self.llm_mode == "dynamic":
                self.switch_model(new_model)  # Calls the nested function
            else:
                logger.warning("Model switching not supported in current mode: %s", self.llm_mode)