import gradio as gr from huggingface_hub import InferenceClient import re client = InferenceClient("Pinkstack/Superthoughts-lite-v1") def process_think(text): """ Searches for text enclosed in ... and replaces it with a collapsible HTML details block. """ pattern = re.compile(r'(.*?)', re.DOTALL) def replacer(match): content = match.group(1).strip() # You can adjust the inline styles or classes as needed. return ( '
' 'Show thoughts' f'
{content}
' '
' ) return pattern.sub(replacer, text) def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p): # Build the conversation history with the system prompt first. messages = [{"role": "system", "content": system_message}] for user_text, bot_text in history: if user_text: messages.append({"role": "user", "content": user_text}) if bot_text: messages.append({"role": "assistant", "content": bot_text}) messages.append({"role": "user", "content": message}) response = "" # Stream the response from the client. for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = chunk.choices[0].delta.content response += token # Process the response to swap sections with collapsible blocks. yield process_think(response) # Custom CSS to style the collapsible block css = """ .think-details { border: 1px solid #ccc; border-radius: 5px; margin: 10px 0; padding: 5px; } .think-summary { cursor: pointer; font-weight: bold; background-color: #f1f1f1; padding: 5px; border-radius: 3px; user-select: none; } """ # Note: The parameter for allowing HTML rendering may vary. # In many cases, the chat output component will render HTML if you pass allow_html=True. demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You must always include ... tokens.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], css=css, allow_html=True # Ensures the HTML in the output is rendered. ) if __name__ == "__main__": demo.launch()