Spaces:
Sleeping
Sleeping
File size: 3,543 Bytes
f1fef64 d9faa8c 317e409 d9faa8c ef99990 fc32600 ef99990 d9faa8c 317e409 d9faa8c 539566d fc32600 f70fc29 9253654 ef99990 d9faa8c ef99990 fc32600 ef99990 fc32600 ef99990 9122113 fc32600 9122113 d9faa8c ef99990 d9faa8c f70fc29 fc32600 d9faa8c fc32600 f70fc29 c2dfdca fc32600 f70fc29 c2dfdca 381d2e1 f70fc29 d9faa8c c2dfdca a26f5ee f70fc29 fc32600 f70fc29 fc32600 f70fc29 d9faa8c f70fc29 d9faa8c 317e409 f1fef64 ab40b57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from functools import lru_cache
# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
return gr.load(
name=f"deepseek-ai/{model_name}",
src="huggingface",
api_name="/chat"
)
# Load all models at startup
MODELS = {
"DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
"DeepSeek-R1": load_hf_model("DeepSeek-R1"),
"DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}
def parse_response(response):
"""Handle different response formats from various models"""
if isinstance(response, list):
if len(response) > 0:
# Handle list of messages format
return response[0].get('generated_text',
response[0].get('content', str(response[0])))
elif isinstance(response, dict):
# Handle OpenAI-style format
if 'choices' in response:
return response['choices'][0]['message']['content']
# Handle standard text generation format
elif 'generated_text' in response:
return response['generated_text']
return f"Unsupported response format: {type(response)}"
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
history = history or []
model_component = MODELS[model_choice]
# Construct messages with optional system message
messages = []
if system_message.strip():
messages.append({"role": "system", "content": system_message})
messages.append({"role": "user", "content": input_text})
payload = {
"messages": messages,
"max_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p
}
try:
response = model_component(payload)
assistant_response = parse_response(response)
except Exception as e:
assistant_response = f"Error: {str(e)}"
history.append((input_text, assistant_response))
return history, history, ""
# Interface setup remains the same
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
gr.Markdown("""# DeepSeek Chatbot""")
with gr.Row():
with gr.Column():
chatbot_output = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Type your message...")
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.ClearButton([msg, chatbot_output])
with gr.Row():
with gr.Accordion("Options", open=True):
model_choice = gr.Radio(
choices=list(MODELS.keys()),
value="DeepSeek-R1"
)
with gr.Accordion("Optional Parameters", open=False):
system_message = gr.Textbox(
value="You are a helpful AI assistant",
lines=2
)
max_new_tokens = gr.Slider(1, 4000, 200)
temperature = gr.Slider(0.1, 4.0, 0.7)
top_p = gr.Slider(0.1, 1.0, 0.9)
chat_history = gr.State([])
submit_btn.click(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
msg.submit(
chatbot,
[msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
[chatbot_output, chat_history, msg]
)
if __name__ == "__main__":
demo.launch() |