File size: 4,928 Bytes
be82a8a
4971496
e0d2fc3
be82a8a
4971496
be82a8a
cdfe590
e0d2fc3
cdfe590
e0d2fc3
 
 
 
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
 
 
 
 
 
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
 
 
 
 
 
 
 
 
00f746f
 
e0d2fc3
 
 
 
 
 
cdfe590
e0d2fc3
87b137c
 
 
 
be82a8a
e0d2fc3
4971496
cdfe590
4971496
cdfe590
50a5b93
 
4971496
e0d2fc3
4971496
cdfe590
4971496
 
50a5b93
 
cdfe590
 
e0d2fc3
cdfe590
 
 
 
 
 
 
 
 
4971496
 
be82a8a
e0d2fc3
cdfe590
00f746f
a6f10c7
00f746f
 
 
 
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
 
00f746f
cdfe590
00f746f
e0d2fc3
00f746f
 
 
 
 
e0d2fc3
cdfe590
00f746f
 
e0d2fc3
 
 
 
 
 
00f746f
e0d2fc3
cdfe590
be82a8a
87b137c
 
 
00f746f
 
 
 
 
a6f10c7
 
 
 
00f746f
a6f10c7
 
 
e0d2fc3
 
 
 
00f746f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    stop_event: gr.EventData,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]

    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    # Add current message
    messages.append({"role": "user", "content": message})

    # Initialize response
    response = ""

    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if stop_event.originator.get("clicked"):
                break
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections"""
    response = response.replace("<think>", '<details open><summary>Show thinking 🧠</summary><div class="thoughts">')
    response = response.replace("</think>", "</div></details>")
    return response

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Note:** First response may take a moment to initialize. Subsequent responses will be faster.")

    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")

    with gr.Accordion("Advanced Settings", open=False):
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float, stop_event: gr.EventData) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response

        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p, stop_event):
            history[-1][1] = partial_response
            yield history

    # Set up chat message handling
    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p, gr.EventData()],
        chatbot
    )

    # Add a clear button
    clear = gr.Button("Clear Conversation")
    clear.click(lambda: None, None, chatbot, queue=False)

    # Add a stop button
    stop_button = gr.Button("Stop")
    stop_button.click(lambda: gr.EventData(clicked=True), outputs=None, queue=False)

    # Add disclaimer
    gr.Markdown(
        """
        ---
        ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information.
        This chat interface is intended for testing and experimentation purposes only.
        """
    )

# Launch the interface
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)