File size: 1,442 Bytes
f960061
d3cbad9
b6c5a4f
d3cbad9
b6c5a4f
d4ff061
d3cbad9
d4ff061
d3cbad9
f960061
 
d4ff061
f960061
 
 
d3cbad9
f960061
d3cbad9
f960061
d3cbad9
f960061
 
 
 
 
d3cbad9
f960061
 
 
 
 
 
 
d3cbad9
f960061
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("hackergeek98/gemma-finetuned")

# Load base model on CPU with optimizations
base_model = AutoModelForCausalLM.from_pretrained(
    "hackergeek98/gemma-finetuned",
    torch_dtype=torch.bfloat16,  # Efficient memory usage
    low_cpu_mem_usage=True
)

# Load fine-tuned model
model = PeftModel.from_pretrained(base_model, "hackergeek98/gemma-finetuned")
model = model.to("cpu")  # Ensure it runs on CPU

# Chatbot function
def chat(message, history=[]):
    messages = [{"role": "user", "content": message}]
    
    input_ids = tokenizer(message, return_tensors="pt").input_ids.to("cpu")

    with torch.no_grad():  # Disable gradient calculations for efficiency
        output_ids = model.generate(input_ids, max_length=100)
    
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    history.append((message, response))  # Store conversation history
    return history, history

# Gradio UI
demo = gr.ChatInterface(
    chat,
    chatbot=gr.Chatbot(height=400),
    additional_inputs=[
        gr.Textbox(value="Welcome to the chatbot!", label="System message")
    ],
    title="Fine-Tuned Gemma Chatbot",
    description="This chatbot is fine-tuned on Persian text using Gemma.",
)

if __name__ == "__main__":
    demo.launch()