Spaces:

Shriti09
/

qwen-base-summarizer

Sleeping

Shriti09 commited on Apr 5

Commit

b3e9d02

verified ·

1 Parent(s): 6302ce8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,24 +24,26 @@ base_model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
     quantization_config=None, # Load base normally first
     torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, # Use appropriate dtype
-    device_map="auto", # Let accelerate handle device mapping
     trust_remote_code=True
 )
 base_model.config.use_cache = True # Enable cache for inference speed
 print(f"Loading PEFT adapter from: {adapter_path}")
 # Load the PEFT model (adapter) on top of the base model
 model = PeftModel.from_pretrained(base_model, adapter_path)
 print("Adapter loaded.")
 print("Merging adapter weights...")
-# Merge the adapter weights into the base model
-# This creates a new model that doesn't need PEFT library for inference
-# Note: This might require significant RAM during the merge process
 model = model.merge_and_unload()
-print("Adapter merged.")
-# Load the tokenizer associated with the base model
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

     base_model_id,
     quantization_config=None, # Load base normally first
     torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, # Use appropriate dtype
+    # device_map="auto", # <--- REMOVE THIS LINE
+    device_map=device,   # <--- CHANGE TO THIS (load directly to device)
     trust_remote_code=True
 )
 base_model.config.use_cache = True # Enable cache for inference speed
+print(f"Base model loaded to device: {device}")
+# --- Load PEFT Adapter ---
 print(f"Loading PEFT adapter from: {adapter_path}")
 # Load the PEFT model (adapter) on top of the base model
+# Ensure the base_model is on the correct device before loading PEFT
 model = PeftModel.from_pretrained(base_model, adapter_path)
 print("Adapter loaded.")
+# --- Merge Adapter ---
 print("Merging adapter weights...")
 model = model.merge_and_unload()
+print("Adapter merged.") # Model should now be on the device specified earlier
+# --- Load Tokenizer ---
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)