Spaces:
Sleeping
Sleeping
Update my_model/LLAMA2/LLAMA2_model.py
Browse files- my_model/LLAMA2/LLAMA2_model.py +22 -11
my_model/LLAMA2/LLAMA2_model.py
CHANGED
|
@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
| 3 |
from typing import Optional
|
| 4 |
import bitsandbytes # only for using on GPU
|
| 5 |
import accelerate # only for using on GPU
|
| 6 |
-
from my_model.config import LLAMA2_config as config
|
| 7 |
import warnings
|
| 8 |
|
| 9 |
# Suppress only FutureWarning from transformers
|
|
@@ -32,6 +32,7 @@ class Llama2ModelManager:
|
|
| 32 |
"""
|
| 33 |
Initializes the Llama2ModelManager class with configuration settings.
|
| 34 |
"""
|
|
|
|
| 35 |
self.device: str = config.DEVICE
|
| 36 |
self.model_name: str = config.MODEL_NAME
|
| 37 |
self.tokenizer_name: str = config.TOKENIZER_NAME
|
|
@@ -51,6 +52,7 @@ class Llama2ModelManager:
|
|
| 51 |
Returns:
|
| 52 |
BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
|
| 53 |
"""
|
|
|
|
| 54 |
if self.quantization == '4bit':
|
| 55 |
return BitsAndBytesConfig(
|
| 56 |
load_in_4bit=True,
|
|
@@ -68,11 +70,13 @@ class Llama2ModelManager:
|
|
| 68 |
|
| 69 |
def load_model(self) -> AutoModelForCausalLM:
|
| 70 |
"""
|
| 71 |
-
Loads the LLaMA-2 model based on the specified configuration.
|
|
|
|
| 72 |
|
| 73 |
Returns:
|
| 74 |
AutoModelForCausalLM: Loaded LLaMA-2 model.
|
| 75 |
"""
|
|
|
|
| 76 |
if self.model is not None:
|
| 77 |
print("Model is already loaded.")
|
| 78 |
return self.model
|
|
@@ -99,6 +103,7 @@ class Llama2ModelManager:
|
|
| 99 |
Returns:
|
| 100 |
AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
|
| 101 |
"""
|
|
|
|
| 102 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
|
| 103 |
token=self.access_token,
|
| 104 |
trust_remote_code=self.trust_remote,
|
|
@@ -111,12 +116,17 @@ class Llama2ModelManager:
|
|
| 111 |
|
| 112 |
return self.tokenizer
|
| 113 |
|
| 114 |
-
def load_model_and_tokenizer(self, for_fine_tuning):
|
| 115 |
"""
|
| 116 |
-
Loads
|
| 117 |
-
|
| 118 |
-
:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
"""
|
|
|
|
| 120 |
if for_fine_tuning:
|
| 121 |
self.tokenizer = self.load_tokenizer()
|
| 122 |
self.model = self.load_model()
|
|
@@ -128,17 +138,17 @@ class Llama2ModelManager:
|
|
| 128 |
return self.model, self.tokenizer
|
| 129 |
|
| 130 |
|
| 131 |
-
def add_special_tokens(self, tokens: Optional[
|
| 132 |
"""
|
| 133 |
-
Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded
|
| 134 |
-
only if the tokenizer is loaded.
|
| 135 |
|
| 136 |
Args:
|
| 137 |
-
tokens (
|
| 138 |
|
| 139 |
Returns:
|
| 140 |
None
|
| 141 |
"""
|
|
|
|
| 142 |
if self.tokenizer is None:
|
| 143 |
print("Tokenizer is not loaded. Cannot add special tokens.")
|
| 144 |
return
|
|
@@ -166,7 +176,8 @@ class Llama2ModelManager:
|
|
| 166 |
|
| 167 |
|
| 168 |
if __name__ == "__main__":
|
| 169 |
-
pass
|
|
|
|
| 170 |
LLAMA2_manager = Llama2ModelManager()
|
| 171 |
LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
|
| 172 |
LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
|
|
|
|
| 3 |
from typing import Optional
|
| 4 |
import bitsandbytes # only for using on GPU
|
| 5 |
import accelerate # only for using on GPU
|
| 6 |
+
from my_model.config import LLAMA2_config as config
|
| 7 |
import warnings
|
| 8 |
|
| 9 |
# Suppress only FutureWarning from transformers
|
|
|
|
| 32 |
"""
|
| 33 |
Initializes the Llama2ModelManager class with configuration settings.
|
| 34 |
"""
|
| 35 |
+
|
| 36 |
self.device: str = config.DEVICE
|
| 37 |
self.model_name: str = config.MODEL_NAME
|
| 38 |
self.tokenizer_name: str = config.TOKENIZER_NAME
|
|
|
|
| 52 |
Returns:
|
| 53 |
BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
|
| 54 |
"""
|
| 55 |
+
|
| 56 |
if self.quantization == '4bit':
|
| 57 |
return BitsAndBytesConfig(
|
| 58 |
load_in_4bit=True,
|
|
|
|
| 70 |
|
| 71 |
def load_model(self) -> AutoModelForCausalLM:
|
| 72 |
"""
|
| 73 |
+
Loads the LLaMA-2 model based on the specified configuration.
|
| 74 |
+
If the model is already loaded, returns the existing model.
|
| 75 |
|
| 76 |
Returns:
|
| 77 |
AutoModelForCausalLM: Loaded LLaMA-2 model.
|
| 78 |
"""
|
| 79 |
+
|
| 80 |
if self.model is not None:
|
| 81 |
print("Model is already loaded.")
|
| 82 |
return self.model
|
|
|
|
| 103 |
Returns:
|
| 104 |
AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
|
| 105 |
"""
|
| 106 |
+
|
| 107 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
|
| 108 |
token=self.access_token,
|
| 109 |
trust_remote_code=self.trust_remote,
|
|
|
|
| 116 |
|
| 117 |
return self.tokenizer
|
| 118 |
|
| 119 |
+
def load_model_and_tokenizer(self, for_fine_tuning: bool) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
| 120 |
"""
|
| 121 |
+
Loads the LLaMA-2 model and tokenizer, and optionally adds special tokens for fine-tuning.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
for_fine_tuning (bool): Whether to prepare the model and tokenizer for fine-tuning.
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
|
| 128 |
"""
|
| 129 |
+
|
| 130 |
if for_fine_tuning:
|
| 131 |
self.tokenizer = self.load_tokenizer()
|
| 132 |
self.model = self.load_model()
|
|
|
|
| 138 |
return self.model, self.tokenizer
|
| 139 |
|
| 140 |
|
| 141 |
+
def add_special_tokens(self, tokens: Optional[List[str]] = None) -> None:
|
| 142 |
"""
|
| 143 |
+
Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded.
|
|
|
|
| 144 |
|
| 145 |
Args:
|
| 146 |
+
tokens (Optional[List[str]]): Special tokens to add. Defaults to a predefined set.
|
| 147 |
|
| 148 |
Returns:
|
| 149 |
None
|
| 150 |
"""
|
| 151 |
+
|
| 152 |
if self.tokenizer is None:
|
| 153 |
print("Tokenizer is not loaded. Cannot add special tokens.")
|
| 154 |
return
|
|
|
|
| 176 |
|
| 177 |
|
| 178 |
if __name__ == "__main__":
|
| 179 |
+
pass # uncomment to to load the mode and tokenizer and add the designed special tokens.
|
| 180 |
+
|
| 181 |
LLAMA2_manager = Llama2ModelManager()
|
| 182 |
LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
|
| 183 |
LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
|