DeepSeek-R1-Chatbot-70b

Sleeping

App Files Files Community

DeepSeek-R1-Chatbot-70b / app.py

ruslanmv

Update app.py

dbeaecd verified 10 months ago

raw

history blame

4.34 kB

	import gradio as gr
	import spaces
	from functools import lru_cache

	# Cache model loading to optimize performance
	@lru_cache(maxsize=3)
	def load_hf_model(model_name):
	return gr.load(
	name=f"deepseek-ai/{model_name}",
	src="huggingface",
	api_name="/chat"
	)

	# Load all models at startup
	MODELS = {
	"DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
	"DeepSeek-R1": load_hf_model("DeepSeek-R1"),
	"DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
	}

	# --- Chatbot function ---
	def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
	# If history is empty, initialize it as a list
	if history is None:
	history = []

	# Select the model
	model_component = MODELS[model_choice]

	# Create payload for the model
	payload = {
	"messages": [{"role": "user", "content": input_text}],
	"system": system_message,
	"max_tokens": max_new_tokens,
	"temperature": temperature,
	"top_p": top_p
	}

	# Run inference using the selected model
	try:
	response = model_component(payload)
	assistant_response = response[-1]["content"]
	except Exception as e:
	assistant_response = f"Error: {str(e)}"

	# Append user and assistant messages in the new format
	history.append({"role": "user", "content": input_text})
	history.append({"role": "assistant", "content": assistant_response})

	# Return the updated conversation to display and store
	# 1) chatbot_output = updated history of messages
	# 2) chat_history = same updated history (as state)
	# 3) "" to clear the input textbox
	return history, history, ""

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
	gr.Markdown(
	"""
	# DeepSeek Chatbot
	Created by [ruslanmv.com](https://ruslanmv.com/)
	This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
	You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
	"""
	)

	with gr.Row():
	with gr.Column():
	# Use type='messages' for OpenAI-style messages
	chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500, type="messages")
	msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
	with gr.Row():
	submit_btn = gr.Button("Submit", variant="primary")
	clear_btn = gr.ClearButton([msg, chatbot_output])

	with gr.Row():
	with gr.Accordion("Options", open=True):
	model_choice = gr.Radio(
	choices=list(MODELS.keys()),
	label="Choose a Model",
	value="DeepSeek-R1"
	)
	with gr.Accordion("Optional Parameters", open=False):
	system_message = gr.Textbox(
	label="System Message",
	value="You are a friendly Chatbot created by ruslanmv.com",
	lines=2,
	)
	max_new_tokens = gr.Slider(
	minimum=1, maximum=4000, value=200, label="Max New Tokens"
	)
	temperature = gr.Slider(
	minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
	)
	top_p = gr.Slider(
	minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
	)

	chat_history = gr.State([])

	# Event handling
	submit_btn.click(
	chatbot,
	[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
	[chatbot_output, chat_history, msg]
	)
	msg.submit(
	chatbot,
	[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
	[chatbot_output, chat_history, msg]
	)

	# (Optional) Remove or modify references to spaces.GPU() if you do not need GPU management
	if __name__ == "__main__":
	# Just launch regularly if you don't need spaces.GPU() for hardware acceleration
	demo.launch()

	# If you require GPU on Hugging Face Spaces, you can wrap demo.launch like so instead:
	# spaces.GPU()(demo.launch)()