agilan1102 commited on
Commit
e4c6a63
·
verified ·
1 Parent(s): 7ebbc19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+ from peft import PeftModel
4
+ import gradio as gr
5
+ from huggingface_hub import login
6
+
7
+ # Log in with the secret token (stored in Hugging Face Secrets)
8
+ login(token="${HF_TOKEN}")
9
+
10
+ # Define model paths
11
+ base_model_name = "meta-llama/Llama-3.2-3B-Instruct"
12
+ lora_adapter_path = "agilan1102/eysflow_adapters"
13
+
14
+ # Load tokenizer and models
15
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=True)
16
+ base_model = AutoModelForCausalLM.from_pretrained(
17
+ base_model_name,
18
+ device_map="auto",
19
+ use_auth_token=True
20
+ )
21
+ model_with_adapter = PeftModel.from_pretrained(base_model, lora_adapter_path, use_auth_token=True)
22
+
23
+ def generate_text_adapter(prompt):
24
+ inputs = tokenizer(prompt, return_tensors="pt").to(model_with_adapter.device)
25
+ outputs = model_with_adapter.generate(**inputs, max_new_tokens=500)
26
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+ return result
28
+
29
+ # Create Gradio interface
30
+ demo = gr.Interface(
31
+ fn=generate_text_adapter,
32
+ inputs="text",
33
+ outputs="text",
34
+ title="My Finetuned LLM API"
35
+ )
36
+
37
+ # Launch the interface
38
+ demo.launch()