Spaces:
Sleeping
Sleeping
File size: 1,985 Bytes
69bbe3d e174a9c 025b757 5044361 3fe707b 025b757 89ef257 025b757 e5e9af1 025b757 3fe707b 025b757 89ef257 025b757 3fe707b 025b757 89ef257 025b757 3fe707b 025b757 89ef257 025b757 89ef257 3fe707b 89ef257 c91a27e 025b757 89ef257 c91a27e 89ef257 c91a27e 89ef257 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Base model and adapter paths
base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
print("π§ Loading base model...")
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)
print("π§ Loading LoRA adapter...")
# Load the LoRA adapter
adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
print("π Merging adapter into base model...")
# Merge adapter into the base model
merged_model = adapter_model.merge_and_unload()
merged_model.eval()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
print("β
Model ready for inference!")
# Text generation function
def generate_text(prompt):
# Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = merged_model.generate(
**inputs,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
# Decode and return the generated response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("<h1>π§ Phi-2 QLoRA Text Generator</h1>")
# Textbox for user input and a button to generate text
prompt = gr.Textbox(label="Enter your prompt:", lines=2)
output = gr.Textbox(label="Generated text:", lines=5)
# Generate text when the button is clicked
prompt.submit(generate_text, prompt, output)
# Launch the app
demo.launch(share=True)
|