Pinkstack commited on
Commit
4971496
·
verified ·
1 Parent(s): fab8c45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -67
app.py CHANGED
@@ -1,85 +1,83 @@
1
  import gradio as gr
 
2
  import re
3
- from typing import List, Tuple
4
- import html
5
 
6
- def create_collapsible_html(thinking: str) -> str:
7
- """Create HTML for a collapsible thinking section"""
8
- escaped_thinking = html.escape(thinking)
9
- return f'''
10
- <div class="thinking-container" style="margin: 8px 0;">
11
- <details class="thinking-details" style="
12
- border: 1px solid #ddd;
13
- border-radius: 4px;
14
- padding: 8px;
15
- background-color: #f8f9fa;
16
- cursor: pointer;
17
- ">
18
- <summary style="
19
- font-weight: 500;
20
- display: flex;
21
- align-items: center;
22
- gap: 8px;
23
- ">
24
- <span style="transform: rotate(-90deg); display: inline-block;">➤</span>
25
- Show thoughts
26
- </summary>
27
- <div style="
28
- margin-top: 8px;
29
- padding: 8px;
30
- border-top: 1px solid #eee;
31
- white-space: pre-wrap;
32
- ">{escaped_thinking}</div>
33
- </details>
34
- </div>
35
- '''
36
 
37
- def process_message(message: str) -> str:
38
- """Process a message to convert thinking sections into collapsible elements"""
39
- def replace_thinking(match):
40
- thinking_content = match.group(1).strip()
41
- return create_collapsible_html(thinking_content)
 
42
 
43
- # Replace thinking sections with collapsible elements
44
- processed = re.sub(r'<think>(.*?)</think>', replace_thinking, message, flags=re.DOTALL)
 
 
 
 
 
 
 
45
 
46
- # Remove output tags if present
47
- processed = re.sub(r'<output>(.*?)</output>', r'\1', processed, flags=re.DOTALL)
 
 
 
 
 
 
 
 
 
48
 
49
- return processed
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- class CustomChatInterface(gr.ChatInterface):
52
- def __init__(self, *args, **kwargs):
53
- super().__init__(*args, **kwargs)
54
-
55
- def render_message(self, message: str, is_user: bool) -> str:
56
- if not is_user:
57
- return process_message(message)
58
- return message
 
 
 
 
 
 
 
 
 
59
 
60
- demo = CustomChatInterface(
 
 
61
  respond,
62
  additional_inputs=[
63
  gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message"),
64
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
65
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
66
- gr.Slider(
67
- minimum=0.1,
68
- maximum=1.0,
69
- value=0.95,
70
- step=0.05,
71
- label="Top-p (nucleus sampling)",
72
- ),
73
  ],
74
- css="""
75
- .thinking-details[open] summary span {
76
- transform: rotate(0deg) !important;
77
- }
78
- .thinking-details summary::-webkit-details-marker {
79
- display: none;
80
- }
81
- """
82
  )
83
 
84
  if __name__ == "__main__":
85
- demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  import re
 
 
4
 
5
+ client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ def process_think(text):
8
+ """
9
+ Searches for text enclosed in <think>...</think> and replaces it with a
10
+ collapsible HTML details block.
11
+ """
12
+ pattern = re.compile(r'<think>(.*?)</think>', re.DOTALL)
13
 
14
+ def replacer(match):
15
+ content = match.group(1).strip()
16
+ # You can adjust the inline styles or classes as needed.
17
+ return (
18
+ '<details class="think-details">'
19
+ '<summary class="think-summary">Show thoughts</summary>'
20
+ f'<div class="think-content">{content}</div>'
21
+ '</details>'
22
+ )
23
 
24
+ return pattern.sub(replacer, text)
25
+
26
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
27
+ # Build the conversation history with the system prompt first.
28
+ messages = [{"role": "system", "content": system_message}]
29
+ for user_text, bot_text in history:
30
+ if user_text:
31
+ messages.append({"role": "user", "content": user_text})
32
+ if bot_text:
33
+ messages.append({"role": "assistant", "content": bot_text})
34
+ messages.append({"role": "user", "content": message})
35
 
36
+ response = ""
37
+ # Stream the response from the client.
38
+ for chunk in client.chat_completion(
39
+ messages,
40
+ max_tokens=max_tokens,
41
+ stream=True,
42
+ temperature=temperature,
43
+ top_p=top_p,
44
+ ):
45
+ token = chunk.choices[0].delta.content
46
+ response += token
47
+ # Process the response to swap <think> sections with collapsible blocks.
48
+ yield process_think(response)
49
 
50
+ # Custom CSS to style the collapsible block
51
+ css = """
52
+ .think-details {
53
+ border: 1px solid #ccc;
54
+ border-radius: 5px;
55
+ margin: 10px 0;
56
+ padding: 5px;
57
+ }
58
+ .think-summary {
59
+ cursor: pointer;
60
+ font-weight: bold;
61
+ background-color: #f1f1f1;
62
+ padding: 5px;
63
+ border-radius: 3px;
64
+ user-select: none;
65
+ }
66
+ """
67
 
68
+ # Note: The parameter for allowing HTML rendering may vary.
69
+ # In many cases, the chat output component will render HTML if you pass allow_html=True.
70
+ demo = gr.ChatInterface(
71
  respond,
72
  additional_inputs=[
73
  gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message"),
74
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
75
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
76
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
77
  ],
78
+ css=css,
79
+ allow_html=True # Ensures the HTML in the output is rendered.
 
 
 
 
 
 
80
  )
81
 
82
  if __name__ == "__main__":
83
+ demo.launch()