import gradio as gr from functools import lru_cache # Cache model loading to optimize performance @lru_cache(maxsize=3) def load_hf_model(model_name): return gr.load( name=f"deepseek-ai/{model_name}", src="huggingface", api_name="/chat" ) # Load all models at startup MODELS = { "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"), "DeepSeek-R1": load_hf_model("DeepSeek-R1"), "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero") } def parse_response(response): """Handle different response formats from various models""" if isinstance(response, list): if len(response) > 0: # Handle list of messages format return response[0].get('generated_text', response[0].get('content', str(response[0]))) elif isinstance(response, dict): # Handle OpenAI-style format if 'choices' in response: return response['choices'][0]['message']['content'] # Handle standard text generation format elif 'generated_text' in response: return response['generated_text'] return f"Unsupported response format: {type(response)}" def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p): history = history or [] model_component = MODELS[model_choice] # Construct messages with optional system message messages = [] if system_message.strip(): messages.append({"role": "system", "content": system_message}) messages.append({"role": "user", "content": input_text}) payload = { "messages": messages, "max_tokens": max_new_tokens, "temperature": temperature, "top_p": top_p } try: response = model_component(payload) assistant_response = parse_response(response) except Exception as e: assistant_response = f"Error: {str(e)}" history.append((input_text, assistant_response)) return history, history, "" # Interface setup remains the same with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo: gr.Markdown("""# DeepSeek Chatbot""") with gr.Row(): with gr.Column(): chatbot_output = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Type your message...") with gr.Row(): submit_btn = gr.Button("Submit", variant="primary") clear_btn = gr.ClearButton([msg, chatbot_output]) with gr.Row(): with gr.Accordion("Options", open=True): model_choice = gr.Radio( choices=list(MODELS.keys()), value="DeepSeek-R1" ) with gr.Accordion("Optional Parameters", open=False): system_message = gr.Textbox( value="You are a helpful AI assistant", lines=2 ) max_new_tokens = gr.Slider(1, 4000, 200) temperature = gr.Slider(0.1, 4.0, 0.7) top_p = gr.Slider(0.1, 1.0, 0.9) chat_history = gr.State([]) submit_btn.click( chatbot, [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p], [chatbot_output, chat_history, msg] ) msg.submit( chatbot, [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p], [chatbot_output, chat_history, msg] ) if __name__ == "__main__": demo.launch()