Spaces:
Sleeping
Sleeping
File size: 4,341 Bytes
f1fef64 dbeaecd d9faa8c 317e409 d9faa8c dbeaecd d9faa8c 317e409 d9faa8c 539566d f70fc29 dbeaecd d9faa8c dbeaecd d9faa8c dbeaecd d9faa8c 9122113 d9faa8c 9122113 d9faa8c dbeaecd dfdfe2f dbeaecd d9faa8c f70fc29 d9faa8c f70fc29 c2dfdca dbeaecd f70fc29 c2dfdca 381d2e1 f70fc29 d9faa8c f70fc29 c2dfdca a26f5ee f70fc29 d9faa8c f70fc29 d9faa8c 317e409 dbeaecd f1fef64 dbeaecd dfdfe2f dbeaecd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
import spaces
from functools import lru_cache
# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
return gr.load(
name=f"deepseek-ai/{model_name}",
src="huggingface",
api_name="/chat"
)
# Load all models at startup
MODELS = {
"DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
"DeepSeek-R1": load_hf_model("DeepSeek-R1"),
"DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}
# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
# If history is empty, initialize it as a list
if history is None:
history = []
# Select the model
model_component = MODELS[model_choice]
# Create payload for the model
payload = {
"messages": [{"role": "user", "content": input_text}],
"system": system_message,
"max_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p
}
# Run inference using the selected model
try:
response = model_component(payload)
assistant_response = response[-1]["content"]
except Exception as e:
assistant_response = f"Error: {str(e)}"
# Append user and assistant messages in the new format
history.append({"role": "user", "content": input_text})
history.append({"role": "assistant", "content": assistant_response})
# Return the updated conversation to display and store
# 1) chatbot_output = updated history of messages
# 2) chat_history = same updated history (as state)
# 3) "" to clear the input textbox
return history, history, ""
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
gr.Markdown(
"""
# DeepSeek Chatbot
Created by [ruslanmv.com](https://ruslanmv.com/)
This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
"""
)
with gr.Row():
with gr.Column():
# Use type='messages' for OpenAI-style messages
chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500, type="messages")
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.ClearButton([msg, chatbot_output])
with gr.Row():
with gr.Accordion("Options", open=True):
model_choice = gr.Radio(
choices=list(MODELS.keys()),
label="Choose a Model",
value="DeepSeek-R1"
)
with gr.Accordion("Optional Parameters", open=False):
system_message = gr.Textbox(
label="System Message",
value="You are a friendly Chatbot created by ruslanmv.com",
lines=2,
)
max_new_tokens = gr.Slider(
minimum=1, maximum=4000, value=200, label="Max New Tokens"
)
temperature = gr.Slider(
minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
)
top_p = gr.Slider(
minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
)
chat_history = gr.State([])
# Event handling
submit_btn.click(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
msg.submit(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
# (Optional) Remove or modify references to spaces.GPU() if you do not need GPU management
if __name__ == "__main__":
# Just launch regularly if you don't need spaces.GPU() for hardware acceleration
demo.launch()
# If you require GPU on Hugging Face Spaces, you can wrap demo.launch like so instead:
# spaces.GPU()(demo.launch)()
|