|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import re |
|
|
|
client = InferenceClient("Pinkstack/Superthoughts-lite-v1") |
|
|
|
def process_think(text): |
|
""" |
|
Searches for text enclosed in <think>...</think> and replaces it with a |
|
collapsible HTML details block. |
|
""" |
|
pattern = re.compile(r'<think>(.*?)</think>', re.DOTALL) |
|
|
|
def replacer(match): |
|
content = match.group(1).strip() |
|
|
|
return ( |
|
'<details class="think-details">' |
|
'<summary class="think-summary">Show thoughts</summary>' |
|
f'<div class="think-content">{content}</div>' |
|
'</details>' |
|
) |
|
|
|
return pattern.sub(replacer, text) |
|
|
|
def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p): |
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
for user_text, bot_text in history: |
|
if user_text: |
|
messages.append({"role": "user", "content": user_text}) |
|
if bot_text: |
|
messages.append({"role": "assistant", "content": bot_text}) |
|
messages.append({"role": "user", "content": message}) |
|
|
|
response = "" |
|
|
|
for chunk in client.chat_completion( |
|
messages, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
): |
|
token = chunk.choices[0].delta.content |
|
response += token |
|
|
|
yield process_think(response) |
|
|
|
|
|
css = """ |
|
.think-details { |
|
border: 1px solid #ccc; |
|
border-radius: 5px; |
|
margin: 10px 0; |
|
padding: 5px; |
|
} |
|
.think-summary { |
|
cursor: pointer; |
|
font-weight: bold; |
|
background-color: #f1f1f1; |
|
padding: 5px; |
|
border-radius: 3px; |
|
user-select: none; |
|
} |
|
""" |
|
|
|
|
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message"), |
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), |
|
], |
|
css=css, |
|
allow_html=True |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|