File size: 4,341 Bytes
f1fef64
dbeaecd
d9faa8c
317e409
d9faa8c
 
 
 
 
dbeaecd
d9faa8c
 
317e409
d9faa8c
 
 
 
 
 
539566d
f70fc29
 
dbeaecd
 
 
 
 
d9faa8c
dbeaecd
d9faa8c
 
 
 
 
 
 
 
dbeaecd
d9faa8c
9122113
d9faa8c
 
9122113
d9faa8c
dbeaecd
 
dfdfe2f
 
dbeaecd
 
 
 
 
d9faa8c
f70fc29
 
d9faa8c
f70fc29
 
 
 
 
 
 
 
 
 
c2dfdca
dbeaecd
 
f70fc29
 
 
 
c2dfdca
 
381d2e1
f70fc29
d9faa8c
f70fc29
c2dfdca
a26f5ee
f70fc29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9faa8c
f70fc29
 
 
 
d9faa8c
317e409
 
dbeaecd
f1fef64
dbeaecd
dfdfe2f
dbeaecd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import spaces
from functools import lru_cache

# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
    return gr.load(
        name=f"deepseek-ai/{model_name}",
        src="huggingface",
        api_name="/chat"
    )

# Load all models at startup
MODELS = {
    "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
    "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
    "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}

# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
    # If history is empty, initialize it as a list
    if history is None:
        history = []

    # Select the model
    model_component = MODELS[model_choice]

    # Create payload for the model
    payload = {
        "messages": [{"role": "user", "content": input_text}],
        "system": system_message,
        "max_tokens": max_new_tokens,
        "temperature": temperature,
        "top_p": top_p
    }

    # Run inference using the selected model
    try:
        response = model_component(payload)
        assistant_response = response[-1]["content"]
    except Exception as e:
        assistant_response = f"Error: {str(e)}"

    # Append user and assistant messages in the new format
    history.append({"role": "user", "content": input_text})
    history.append({"role": "assistant", "content": assistant_response})

    # Return the updated conversation to display and store
    # 1) chatbot_output = updated history of messages
    # 2) chat_history = same updated history (as state)
    # 3) "" to clear the input textbox
    return history, history, ""

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
    gr.Markdown(
        """
        # DeepSeek Chatbot
        Created by [ruslanmv.com](https://ruslanmv.com/)
        This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
        You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
        """
    )

    with gr.Row():
        with gr.Column():
            # Use type='messages' for OpenAI-style messages
            chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500, type="messages")
            msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
            with gr.Row():
                submit_btn = gr.Button("Submit", variant="primary")
                clear_btn = gr.ClearButton([msg, chatbot_output])

    with gr.Row():
        with gr.Accordion("Options", open=True):
            model_choice = gr.Radio(
                choices=list(MODELS.keys()),
                label="Choose a Model",
                value="DeepSeek-R1"
            )
            with gr.Accordion("Optional Parameters", open=False):
                system_message = gr.Textbox(
                    label="System Message",
                    value="You are a friendly Chatbot created by ruslanmv.com",
                    lines=2,
                )
                max_new_tokens = gr.Slider(
                    minimum=1, maximum=4000, value=200, label="Max New Tokens"
                )
                temperature = gr.Slider(
                    minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
                )
                top_p = gr.Slider(
                    minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
                )

    chat_history = gr.State([])

    # Event handling
    submit_btn.click(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )
    msg.submit(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )

# (Optional) Remove or modify references to spaces.GPU() if you do not need GPU management
if __name__ == "__main__":
    # Just launch regularly if you don't need spaces.GPU() for hardware acceleration
    demo.launch()

    # If you require GPU on Hugging Face Spaces, you can wrap demo.launch like so instead:
    # spaces.GPU()(demo.launch)()