Spaces:

Pinkstack
/

Chat-with-superthoughts-lite

Sleeping

File size: 4,928 Bytes

be82a8a
4971496
e0d2fc3
be82a8a
4971496
be82a8a
cdfe590
e0d2fc3
cdfe590
e0d2fc3
 
 
 
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
 
 
 
 
 
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
4971496
00f746f
e0d2fc3
 
 
 
 
 
 
 
 
00f746f
 
e0d2fc3
 
 
 
 
 
cdfe590
e0d2fc3
87b137c
 
 
 
be82a8a
e0d2fc3
4971496
cdfe590
4971496
cdfe590
50a5b93
 
4971496
e0d2fc3
4971496
cdfe590
4971496
 
50a5b93
 
cdfe590
 
e0d2fc3
cdfe590
 
 
 
 
 
 
 
 
4971496
 
be82a8a
e0d2fc3
cdfe590
00f746f
a6f10c7
00f746f
 
 
 
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
 
00f746f
cdfe590
00f746f
e0d2fc3
00f746f
 
 
 
 
e0d2fc3
cdfe590
00f746f
 
e0d2fc3
 
 
 
 
 
00f746f
e0d2fc3
cdfe590
be82a8a
87b137c
 
 
00f746f
 
 
 
 
a6f10c7
 
 
 
00f746f
a6f10c7
 
 
e0d2fc3
 
 
 
00f746f

import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    stop_event: gr.EventData,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]

    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    # Add current message
    messages.append({"role": "user", "content": message})

    # Initialize response
    response = ""

    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if stop_event.originator.get("clicked"):
                break
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections"""
    response = response.replace("<think>", '<details open><summary>Show thinking 🧠</summary><div class="thoughts">')
    response = response.replace("</think>", "</div></details>")
    return response

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #000000;
    color: #ffffff;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Note:** First response may take a moment to initialize. Subsequent responses will be faster.")

    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")

    with gr.Accordion("Advanced Settings", open=False):
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float, stop_event: gr.EventData) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response

        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p, stop_event):
            history[-1][1] = partial_response
            yield history

    # Set up chat message handling
    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p, gr.EventData()],
        chatbot
    )

    # Add a clear button
    clear = gr.Button("Clear Conversation")
    clear.click(lambda: None, None, chatbot, queue=False)

    # Add a stop button
    stop_button = gr.Button("Stop")
    stop_button.click(lambda: gr.EventData(clicked=True), outputs=None, queue=False)

    # Add disclaimer
    gr.Markdown(
        """
        ---
        ⚠️ **Disclaimer:** Superthoughts may make mistakes. Always verify important information.
        This chat interface is intended for testing and experimentation purposes only.
        """
    )

# Launch the interface
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)