Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load Model & Tokenizer | |
MODEL_NAME = "Qwen/Qwen2.5-3B" # Change to your HF model repo if fine-tuned | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto") | |
def tinyzero_chat(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
outputs = model.generate(**inputs, max_length=512, temperature=0.7, top_p=0.9) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=tinyzero_chat, | |
inputs=gr.Textbox(placeholder="Ask TinyZero anything..."), | |
outputs=gr.Textbox(), | |
title="TinyZero Chatbot", | |
description="An interactive demo of TinyZero trained on reasoning tasks." | |
) | |
# Launch | |
demo.launch() | |