sksameermujahid commited on
Commit
15a0540
·
verified ·
1 Parent(s): e341d1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -196,11 +196,13 @@ def load_tokenizer_and_model():
196
  print("Tokenizer loaded successfully.")
197
 
198
  print("Loading LLM model...")
199
- # Load the base model
200
  base_model = AutoModelForCausalLM.from_pretrained(
201
  base_model_name,
202
  trust_remote_code=True,
203
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
 
204
  device_map="auto"
205
  )
206
 
@@ -208,9 +210,9 @@ def load_tokenizer_and_model():
208
  model_llm = PeftModel.from_pretrained(
209
  base_model,
210
  model_dir,
211
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
212
- device_map="auto"
213
- ).to(device)
214
 
215
  print("LLM model loaded successfully.")
216
  return tokenizer, model_llm
 
196
  print("Tokenizer loaded successfully.")
197
 
198
  print("Loading LLM model...")
199
+ # Load the base model with 4-bit quantization
200
  base_model = AutoModelForCausalLM.from_pretrained(
201
  base_model_name,
202
  trust_remote_code=True,
203
+ load_in_4bit=True,
204
+ bnb_4bit_quant_type="nf4",
205
+ bnb_4bit_compute_dtype=torch.float16,
206
  device_map="auto"
207
  )
208
 
 
210
  model_llm = PeftModel.from_pretrained(
211
  base_model,
212
  model_dir,
213
+ device_map="auto",
214
+ is_trainable=False
215
+ )
216
 
217
  print("LLM model loaded successfully.")
218
  return tokenizer, model_llm