File size: 1,985 Bytes
69bbe3d
 
 
 
 
 
 
 
e174a9c
025b757
5044361
3fe707b
025b757
89ef257
025b757
 
e5e9af1
025b757
3fe707b
025b757
89ef257
025b757
3fe707b
025b757
89ef257
025b757
 
3fe707b
025b757
 
 
 
89ef257
 
 
 
025b757
 
 
 
 
 
 
 
 
 
 
89ef257
3fe707b
89ef257
c91a27e
025b757
 
89ef257
 
 
 
 
c91a27e
89ef257
 
c91a27e
89ef257
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr

# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Base model and adapter paths
base_model_name = "microsoft/phi-2"  # Pull from HF Hub directly
adapter_path = "Shriti09/Microsoft-Phi-QLora"  # Update with your Hugging Face repo path

print("πŸ”§ Loading base model...")
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
)

print("πŸ”§ Loading LoRA adapter...")
# Load the LoRA adapter
adapter_model = PeftModel.from_pretrained(base_model, adapter_path)

print("πŸ”— Merging adapter into base model...")
# Merge adapter into the base model
merged_model = adapter_model.merge_and_unload()
merged_model.eval()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
print("βœ… Model ready for inference!")

# Text generation function
def generate_text(prompt):
    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = merged_model.generate(
            **inputs,
            max_new_tokens=150,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode and return the generated response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("<h1>🧠 Phi-2 QLoRA Text Generator</h1>")
    
    # Textbox for user input and a button to generate text
    prompt = gr.Textbox(label="Enter your prompt:", lines=2)
    output = gr.Textbox(label="Generated text:", lines=5)

    # Generate text when the button is clicked
    prompt.submit(generate_text, prompt, output)

# Launch the app
demo.launch(share=True)