Spaces:

Pinkstack
/

Chat-with-superthoughts-lite

Sleeping

File size: 2,974 Bytes

be82a8a
4971496
be82a8a
4971496
be82a8a
cdfe590
 
 
 
 
 
 
 
4971496
cdfe590
 
 
 
 
4971496
 
cdfe590
4971496
 
 
 
 
 
cdfe590
4971496
cdfe590
 
 
 
 
 
 
be82a8a
4971496
cdfe590
4971496
cdfe590
d0018d0
4971496
 
cdfe590
4971496
 
d0018d0
cdfe590
 
 
 
 
 
 
 
 
 
 
4971496
 
be82a8a
cdfe590
d0018d0
 
cdfe590
 
 
 
d0018d0
cdfe590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0018d0
cdfe590
 
be82a8a
cdfe590

import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})
    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

def format_response(response):
    # Replace <think>...</think> with a collapsible section
    response = response.replace("<think>", '<details><summary>Show thoughts</summary><div class="thoughts">')
    response = response.replace("</think>", "</div></details>")
    return response

css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    border-radius: 5px;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    border-radius: 5px;
    font-weight: bold;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed.")

    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    system_message = gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message")
    max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
    temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history, system_message, max_tokens, temperature, top_p):
        user_message, _ = history[-1]
        response = ""
        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
            response = partial_response
        formatted_response = format_response(response)
        history[-1][1] = formatted_response
        return history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
        bot, [chatbot, system_message, max_tokens, temperature, top_p], chatbot
    )

demo.launch()