Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -196,11 +196,13 @@ def load_tokenizer_and_model():
|
|
196 |
print("Tokenizer loaded successfully.")
|
197 |
|
198 |
print("Loading LLM model...")
|
199 |
-
# Load the base model
|
200 |
base_model = AutoModelForCausalLM.from_pretrained(
|
201 |
base_model_name,
|
202 |
trust_remote_code=True,
|
203 |
-
|
|
|
|
|
204 |
device_map="auto"
|
205 |
)
|
206 |
|
@@ -208,9 +210,9 @@ def load_tokenizer_and_model():
|
|
208 |
model_llm = PeftModel.from_pretrained(
|
209 |
base_model,
|
210 |
model_dir,
|
211 |
-
|
212 |
-
|
213 |
-
)
|
214 |
|
215 |
print("LLM model loaded successfully.")
|
216 |
return tokenizer, model_llm
|
|
|
196 |
print("Tokenizer loaded successfully.")
|
197 |
|
198 |
print("Loading LLM model...")
|
199 |
+
# Load the base model with 4-bit quantization
|
200 |
base_model = AutoModelForCausalLM.from_pretrained(
|
201 |
base_model_name,
|
202 |
trust_remote_code=True,
|
203 |
+
load_in_4bit=True,
|
204 |
+
bnb_4bit_quant_type="nf4",
|
205 |
+
bnb_4bit_compute_dtype=torch.float16,
|
206 |
device_map="auto"
|
207 |
)
|
208 |
|
|
|
210 |
model_llm = PeftModel.from_pretrained(
|
211 |
base_model,
|
212 |
model_dir,
|
213 |
+
device_map="auto",
|
214 |
+
is_trainable=False
|
215 |
+
)
|
216 |
|
217 |
print("LLM model loaded successfully.")
|
218 |
return tokenizer, model_llm
|