Spaces:

Pinkstack
/

Chat-with-superthoughts-lite

Sleeping

File size: 4,439 Bytes

be82a8a
4971496
e0d2fc3
be82a8a
4971496
be82a8a
cdfe590
e0d2fc3
cdfe590
e0d2fc3
 
 
 
 
4971496
e0d2fc3
 
 
 
 
 
 
 
 
4971496
e0d2fc3
 
4971496
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
 
 
cdfe590
 
be82a8a
e0d2fc3
4971496
cdfe590
4971496
cdfe590
e0d2fc3
4971496
e0d2fc3
4971496
cdfe590
4971496
 
e0d2fc3
cdfe590
 
e0d2fc3
cdfe590
 
 
 
 
 
 
 
 
4971496
 
be82a8a
e0d2fc3
cdfe590
e0d2fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdfe590
e0d2fc3
 
cdfe590
 
e0d2fc3
 
cdfe590
e0d2fc3
 
cdfe590
e0d2fc3
 
cdfe590
e0d2fc3
 
 
 
 
 
 
 
 
 
cdfe590
be82a8a
e0d2fc3

import gradio as gr
from huggingface_hub import InferenceClient
from typing import Iterator

client = InferenceClient("Pinkstack/Superthoughts-lite-v1")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
) -> Iterator[str]:
    messages = [{"role": "system", "content": system_message}]
    
    # Add history to messages
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Initialize response
    response = ""
    
    # Stream the response
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content
                response += token
                yield format_response(response)
    except Exception as e:
        yield f"Error: {str(e)}"

def format_response(response: str) -> str:
    """Format the response with collapsible thinking sections"""
    response = response.replace("<think>", '<details><summary>Show thinking 🧠</summary><div class="thoughts">')
    response = response.replace("</think>", "</div></details>")
    return response

# Custom CSS for styling
css = """
.thoughts {
    border: 1px solid #ccc;
    padding: 10px;
    background-color: #f8f9fa;
    border-radius: 5px;
    margin: 5px 0;
}
details summary {
    cursor: pointer;
    padding: 5px;
    background-color: #eee;
    border-radius: 5px;
    font-weight: bold;
    margin: 5px 0;
}
details summary::-webkit-details-marker {
    display: none;
}
details summary:after {
    content: " ▶";
}
details[open] summary:after {
    content: " ▼";
}
"""

# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("# Chat with Superthoughts lite! (1.7B)")
    gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed, keep in mind that this chat is only meant for testing and experimenting.")
    
    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
    
    with gr.Accordion("Advanced Settings", open=False):
        system_message = gr.Textbox(
            value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
            label="System message"
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=4096,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    def user(user_message: str, history: list) -> tuple[str, list]:
        """Add user message to history"""
        return "", history + [[user_message, None]]

    def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
        """Generate and stream bot responses"""
        user_message, _ = history[-1]
        history[-1][1] = ""  # Initialize bot's response
        
        for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
            history[-1][1] = partial_response
            yield history

    # Set up chat message handling
    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p],
        chatbot
    )

    # Add a clear button
    clear = gr.Button("Clear Conversation")
    clear.click(lambda: None, None, chatbot, queue=False)

# Launch the interface
if __name__ == "__main__":
    demo.queue()
    demo.launch(share=True)