10M-LLM / app.py
abancp's picture
Update app.py
5647494 verified
raw
history blame
676 Bytes
import gradio as gr
from inference_fine_tune import generate_response # generator-based inference code
print("Gradio version:", gr.__version__)
def chat_interface(prompt):
return generate_response(prompt) # returns a generator
with gr.Blocks() as demo:
gr.Markdown("## Chat with the Model")
with gr.Row():
inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3)
out = gr.Textbox(label="Model Response", lines=10)
btn = gr.Button("Send")
# ✅ Enable streaming to allow generator output to be rendered step-by-step
btn.click(chat_interface, inputs=inp, outputs=out, streaming=True)
demo.launch(share=True)