Spaces:
Sleeping
Sleeping
File size: 4,016 Bytes
f1fef64 d9faa8c 317e409 d9faa8c 54fe9a3 317e409 d9faa8c 54fe9a3 d9faa8c 539566d 1472595 f70fc29 9253654 ef99990 1472595 d9faa8c ef99990 1472595 ef99990 54fe9a3 ef99990 1472595 9122113 54fe9a3 1472595 9122113 d9faa8c ef99990 1472595 ef99990 1472595 d9faa8c f70fc29 1472595 d9faa8c 1472595 f70fc29 c2dfdca 1472595 f70fc29 c2dfdca 381d2e1 f70fc29 d9faa8c 1472595 54fe9a3 a26f5ee f70fc29 1472595 f70fc29 1472595 f70fc29 d9faa8c f70fc29 d9faa8c 317e409 f1fef64 ab40b57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
from functools import lru_cache
# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
return gr.load(f"models/{model_name}", src="huggingface")
# Load all models at startup
MODELS = {
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": load_hf_model("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"),
"deepseek-ai/DeepSeek-R1": load_hf_model("deepseek-ai/DeepSeek-R1"),
"deepseek-ai/DeepSeek-R1-Zero": load_hf_model("deepseek-ai/DeepSeek-R1-Zero")
}
# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
history = history or []
# Get the selected model component
model_component = MODELS[model_choice]
# Create payload for the model
payload = {
"inputs": input_text, # Directly pass the input text
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p,
"return_full_text": False # Only return the generated text
}
}
# Run inference using the selected model
try:
response = model_component(**payload) # Pass payload as keyword arguments
if isinstance(response, list) and len(response) > 0:
# Extract the generated text from the response
assistant_response = response[0].get("generated_text", "No response generated.")
else:
assistant_response = "Unexpected model response format."
except Exception as e:
assistant_response = f"Error: {str(e)}"
# Append user and assistant messages to history
history.append((input_text, assistant_response))
return history, history, ""
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
gr.Markdown(
"""
# DeepSeek Chatbot
Created by [ruslanmv.com](https://ruslanmv.com/)
This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
"""
)
with gr.Row():
with gr.Column():
chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500)
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.ClearButton([msg, chatbot_output])
with gr.Row():
with gr.Accordion("Options", open=True):
model_choice = gr.Radio(
choices=list(MODELS.keys()),
label="Choose a Model",
value="deepseek-ai/DeepSeek-R1"
)
with gr.Accordion("Optional Parameters", open=False):
system_message = gr.Textbox(
label="System Message",
value="You are a friendly Chatbot created by ruslanmv.com",
lines=2,
)
max_new_tokens = gr.Slider(
minimum=1, maximum=4000, value=200, label="Max New Tokens"
)
temperature = gr.Slider(
minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
)
top_p = gr.Slider(
minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
)
chat_history = gr.State([])
# Event handling
submit_btn.click(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
msg.submit(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
if __name__ == "__main__":
demo.launch() |