File size: 4,175 Bytes
bf6d237
af78253
bf6d237
 
 
3078352
 
bf6d237
 
 
 
 
 
 
a09836d
bf6d237
 
 
 
af78253
 
bf6d237
3078352
 
af78253
bf6d237
 
 
 
 
 
f6669fb
bf6d237
f6669fb
bf6d237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3078352
bf6d237
3078352
 
af78253
42247c8
 
af78253
 
ea4b6eb
af78253
 
 
 
 
 
ea4b6eb
af78253
1032583
af78253
62bc489
1032583
af78253
62bc489
af78253
 
1032583
62bc489
 
af78253
 
62bc489
 
 
af78253
 
62bc489
 
 
 
 
95a57e5
 
62bc489
 
 
 
 
 
 
 
 
3078352
7344079
a7f0c1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import logging
import os
from injector import inject, singleton
from llama_index.llms import MockLLM
from llama_index.llms.base import LLM
from fastapi import Depends
from llama_index.llms import OpenAI

from private_gpt.components.llm.prompt_helper import get_prompt_style
from private_gpt.paths import models_path
from private_gpt.settings.settings import Settings

logger = logging.getLogger(__name__)

@singleton
class LLMComponent:
    llm: LLM

    @inject
    def __init__(self, settings: Settings) -> None:
        llm_mode = settings.llm.mode
        logger.info("Initializing the LLM in mode=%s", llm_mode)
        
        
        match settings.llm.mode:
            case "local":
                from llama_index.llms import LlamaCPP
                prompt_style_cls = get_prompt_style(settings.local.prompt_style)
                prompt_style = prompt_style_cls(
                    default_system_prompt=settings.local.default_system_prompt
                )
                model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"
                self.llm = LlamaCPP(
                    model_url=model_url,
                    temperature=0.1,
                    max_new_tokens=settings.llm.max_new_tokens,
                    context_window=3900,
                    generate_kwargs={},
                    model_kwargs={"n_gpu_layers": -1},
                    messages_to_prompt=prompt_style.messages_to_prompt,
                    completion_to_prompt=prompt_style.completion_to_prompt,
                    verbose=True,
                )
            case "sagemaker":
                from private_gpt.components.llm.custom.sagemaker import SagemakerLLM
                self.llm = SagemakerLLM(
                    endpoint_name=settings.sagemaker.llm_endpoint_name,
                )
            case "openai":
                from llama_index.llms import OpenAI
                openai_settings = settings.openai.api_key

                #default startup
                logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
                self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)

            case "dynamic":
                from llama_index.llms import OpenAI
                openai_settings = settings.openai.api_key

                #default startup
                logger.info("Initializing the GPT Model in=%s", "gpt-3.5-turbo")
                self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)
  
            case "mock":
                self.llm = MockLLM()

    @inject
    def switch_model(self, new_model: str, settings: Settings) -> None:
        from llama_index.llms import LlamaCPP
        openai_settings = settings.openai.api_key


        if new_model == "gpt-3.5-turbo":
            self.llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_settings)


        elif new_model == "gpt-4":
            # Initialize with the new model 
            self.llm = OpenAI(model="gpt-4", api_key=openai_settings)
            logger.info("Initializing the GPT Model in=%s", "gpt-4")

        
        elif new_model == "mistral-7B":
            model_url= "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"
            #model_filename = os.path.basename(model_url)
            prompt_style_cls = get_prompt_style(settings.local.prompt_style)
            prompt_style = prompt_style_cls(
                default_system_prompt=settings.local.default_system_prompt
            )
            self.llm = LlamaCPP(
                #model_path=str(models_path / settings.local.llm_hf_model_file),
                model_url= model_url,
                temperature=0.1,
                max_new_tokens=settings.llm.max_new_tokens,
                context_window=3900,
                generate_kwargs={},
                model_kwargs={"n_gpu_layers": -1},
                messages_to_prompt=prompt_style.messages_to_prompt,
                completion_to_prompt=prompt_style.completion_to_prompt,
                verbose=True,
            )