Shriti09 commited on
Commit
3fe707b
·
verified ·
1 Parent(s): 9a71527

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from peft import PeftModel
4
+ import gradio as gr
5
+
6
+ # Base model and adapter repo
7
+ BASE_MODEL_NAME = "microsoft/phi-2"
8
+ ADAPTER_REPO = "Shriti09/Microsoft-Phi-QLora"
9
+
10
+ # Load the tokenizer
11
+ print("Loading tokenizer...")
12
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
13
+ tokenizer.pad_token = tokenizer.eos_token
14
+
15
+ # Load the base model
16
+ print("Loading base model...")
17
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME, device_map="auto")
18
+
19
+ # Load adapter weights
20
+ print("Loading LoRA adapter...")
21
+ model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
22
+
23
+ # Merge adapter into base model (optional, makes inference simpler)
24
+ model = model.merge_and_unload()
25
+
26
+ # Put model in eval mode
27
+ model.eval()
28
+
29
+ # Function to generate response from prompt
30
+ def generate_response(prompt):
31
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
32
+ outputs = model.generate(
33
+ **inputs,
34
+ max_length=256,
35
+ do_sample=True,
36
+ top_p=0.95,
37
+ temperature=0.7
38
+ )
39
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ return response
41
+
42
+ # Gradio UI
43
+ gr.Interface(
44
+ fn=generate_response,
45
+ inputs=gr.Textbox(lines=2, placeholder="Ask me something..."),
46
+ outputs="text",
47
+ title="Phi-2 QLoRA Chatbot",
48
+ description="Chat with Phi-2 fine-tuned with QLoRA adapters!"
49
+ ).launch()