ruslanmv's picture
Update app.py
dbeaecd verified
raw
history blame
4.34 kB
import gradio as gr
import spaces
from functools import lru_cache
# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
return gr.load(
name=f"deepseek-ai/{model_name}",
src="huggingface",
api_name="/chat"
)
# Load all models at startup
MODELS = {
"DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
"DeepSeek-R1": load_hf_model("DeepSeek-R1"),
"DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}
# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
# If history is empty, initialize it as a list
if history is None:
history = []
# Select the model
model_component = MODELS[model_choice]
# Create payload for the model
payload = {
"messages": [{"role": "user", "content": input_text}],
"system": system_message,
"max_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p
}
# Run inference using the selected model
try:
response = model_component(payload)
assistant_response = response[-1]["content"]
except Exception as e:
assistant_response = f"Error: {str(e)}"
# Append user and assistant messages in the new format
history.append({"role": "user", "content": input_text})
history.append({"role": "assistant", "content": assistant_response})
# Return the updated conversation to display and store
# 1) chatbot_output = updated history of messages
# 2) chat_history = same updated history (as state)
# 3) "" to clear the input textbox
return history, history, ""
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
gr.Markdown(
"""
# DeepSeek Chatbot
Created by [ruslanmv.com](https://ruslanmv.com/)
This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
"""
)
with gr.Row():
with gr.Column():
# Use type='messages' for OpenAI-style messages
chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500, type="messages")
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.ClearButton([msg, chatbot_output])
with gr.Row():
with gr.Accordion("Options", open=True):
model_choice = gr.Radio(
choices=list(MODELS.keys()),
label="Choose a Model",
value="DeepSeek-R1"
)
with gr.Accordion("Optional Parameters", open=False):
system_message = gr.Textbox(
label="System Message",
value="You are a friendly Chatbot created by ruslanmv.com",
lines=2,
)
max_new_tokens = gr.Slider(
minimum=1, maximum=4000, value=200, label="Max New Tokens"
)
temperature = gr.Slider(
minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
)
top_p = gr.Slider(
minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
)
chat_history = gr.State([])
# Event handling
submit_btn.click(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
msg.submit(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
# (Optional) Remove or modify references to spaces.GPU() if you do not need GPU management
if __name__ == "__main__":
# Just launch regularly if you don't need spaces.GPU() for hardware acceleration
demo.launch()
# If you require GPU on Hugging Face Spaces, you can wrap demo.launch like so instead:
# spaces.GPU()(demo.launch)()