Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftModel | |
import gradio as gr | |
# Use GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Base model and adapter paths | |
base_model_name = "microsoft/phi-2" # Pull from HF Hub directly | |
adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path | |
print("π§ Loading base model...") | |
# Load the base model | |
base_model = AutoModelForCausalLM.from_pretrained( | |
base_model_name, | |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32 | |
) | |
print("π§ Loading LoRA adapter...") | |
# Load the LoRA adapter | |
adapter_model = PeftModel.from_pretrained(base_model, adapter_path) | |
print("π Merging adapter into base model...") | |
# Merge adapter into the base model | |
merged_model = adapter_model.merge_and_unload() | |
merged_model.eval() | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
print("β Model ready for inference!") | |
# Text generation function | |
def generate_text(prompt): | |
# Tokenize the input | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = merged_model.generate( | |
**inputs, | |
max_new_tokens=150, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode and return the generated response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Gradio UI | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("<h1>π§ Phi-2 QLoRA Text Generator</h1>") | |
# Textbox for user input | |
prompt = gr.Textbox(label="Enter your prompt:", lines=2) | |
# Output textbox for generated text | |
output = gr.Textbox(label="Generated text:", lines=5) | |
# Button to trigger text generation | |
generate_button = gr.Button("Generate Text") | |
# Set the button action to generate text | |
generate_button.click(generate_text, inputs=prompt, outputs=output) | |
# Launch the app | |
demo.launch(share=True) | |