Update app.py
Browse files
app.py
CHANGED
@@ -56,9 +56,9 @@ from peft import LoraConfig, get_peft_model
|
|
56 |
# Load LLaMA 2 model in 4-bit mode to save memory
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
model_name,
|
59 |
-
|
60 |
-
|
61 |
-
device_map="cpu",
|
62 |
quantization_config=None
|
63 |
)
|
64 |
|
|
|
56 |
# Load LLaMA 2 model in 4-bit mode to save memory
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
model_name,
|
59 |
+
load_in_4bit=True, # Use 4-bit quantization for efficiency
|
60 |
+
device_map="auto"
|
61 |
+
#device_map="cpu",
|
62 |
quantization_config=None
|
63 |
)
|
64 |
|