Spaces:
Sleeping
Sleeping
File size: 834 Bytes
3337791 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Base model and adapter paths
base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)
# Load the LoRA adapter
adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
# Merge the LoRA adapter into the base model
merged_model = adapter_model.merge_and_unload()
# Save the merged model to Hugging Face (space storage)
merged_model.save_pretrained("./merged_model")
|