File size: 1,166 Bytes
e4c6a63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel
import gradio as gr
from huggingface_hub import login

# Log in with the secret token (stored in Hugging Face Secrets)
login(token="${HF_TOKEN}")

# Define model paths
base_model_name = "meta-llama/Llama-3.2-3B-Instruct"
lora_adapter_path = "agilan1102/eysflow_adapters"

# Load tokenizer and models
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=True)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    use_auth_token=True
)
model_with_adapter = PeftModel.from_pretrained(base_model, lora_adapter_path, use_auth_token=True)

def generate_text_adapter(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model_with_adapter.device)
    outputs = model_with_adapter.generate(**inputs, max_new_tokens=500)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result

# Create Gradio interface
demo = gr.Interface(
    fn=generate_text_adapter,
    inputs="text",
    outputs="text",
    title="My Finetuned LLM API"
)

# Launch the interface
demo.launch()