Spaces:
Sleeping
Sleeping
File size: 905 Bytes
51d6cc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load Model & Tokenizer
MODEL_NAME = "Qwen/Qwen2.5-3B" # Change to your HF model repo if fine-tuned
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
def tinyzero_chat(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=512, temperature=0.7, top_p=0.9)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Gradio Interface
demo = gr.Interface(
fn=tinyzero_chat,
inputs=gr.Textbox(placeholder="Ask TinyZero anything..."),
outputs=gr.Textbox(),
title="TinyZero Chatbot",
description="An interactive demo of TinyZero trained on reasoning tasks."
)
# Launch
demo.launch()
|