Pinkstack commited on
Commit
cdfe590
·
verified ·
1 Parent(s): 28762e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -56
app.py CHANGED
@@ -1,82 +1,91 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import re
4
 
5
  client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
6
 
7
- def process_think(text):
8
- """
9
- Searches for text enclosed in <think>...</think> and replaces it with a
10
- collapsible HTML details block.
11
- """
12
- pattern = re.compile(r'<think>(.*?)</think>', re.DOTALL)
13
-
14
- def replacer(match):
15
- content = match.group(1).strip()
16
- # You can adjust the inline styles or classes as needed.
17
- return (
18
- '<details class="think-details">'
19
- '<summary class="think-summary">Show thoughts</summary>'
20
- f'<div class="think-content">{content}</div>'
21
- '</details>'
22
- )
23
-
24
- return pattern.sub(replacer, text)
25
-
26
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
27
- # Build the conversation history with the system prompt first.
28
  messages = [{"role": "system", "content": system_message}]
29
- for user_text, bot_text in history:
30
- if user_text:
31
- messages.append({"role": "user", "content": user_text})
32
- if bot_text:
33
- messages.append({"role": "assistant", "content": bot_text})
34
  messages.append({"role": "user", "content": message})
35
-
36
  response = ""
37
- # Stream the response from the client.
38
- for chunk in client.chat_completion(
39
  messages,
40
  max_tokens=max_tokens,
41
  stream=True,
42
  temperature=temperature,
43
  top_p=top_p,
44
  ):
45
- token = chunk.choices[0].delta.content
46
  response += token
47
- # Process the response to swap <think> sections with collapsible blocks.
48
- yield process_think(response)
 
 
 
 
 
49
 
50
- # Custom CSS to style the collapsible block
51
  css = """
52
- .think-details {
53
  border: 1px solid #ccc;
 
 
54
  border-radius: 5px;
55
- margin: 10px 0;
56
- padding: 5px;
57
  }
58
- .think-summary {
59
  cursor: pointer;
60
- font-weight: bold;
61
- background-color: #f1f1f1;
62
  padding: 5px;
63
- border-radius: 3px;
64
- user-select: none;
 
 
 
 
 
 
 
 
 
 
65
  }
66
  """
67
 
68
- # Note: The parameter for allowing HTML rendering may vary.
69
- # In many cases, the chat output component will render HTML if you pass allow_html=True.
70
- demo = gr.ChatInterface(
71
- respond,
72
- additional_inputs=[
73
- gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message"),
74
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
75
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
76
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
77
- ],
78
- css=css,
79
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- if __name__ == "__main__":
82
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
  client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
5
 
6
+ def respond(
7
+ message,
8
+ history: list[tuple[str, str]],
9
+ system_message,
10
+ max_tokens,
11
+ temperature,
12
+ top_p,
13
+ ):
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  messages = [{"role": "system", "content": system_message}]
15
+ for val in history:
16
+ if val[0]:
17
+ messages.append({"role": "user", "content": val[0]})
18
+ if val[1]:
19
+ messages.append({"role": "assistant", "content": val[1]})
20
  messages.append({"role": "user", "content": message})
 
21
  response = ""
22
+ for message in client.chat_completion(
 
23
  messages,
24
  max_tokens=max_tokens,
25
  stream=True,
26
  temperature=temperature,
27
  top_p=top_p,
28
  ):
29
+ token = message.choices[0].delta.content
30
  response += token
31
+ yield response
32
+
33
+ def format_response(response):
34
+ # Replace <think>...</think> with a collapsible section
35
+ response = response.replace("<think>", '<details><summary>Show thoughts</summary><div class="thoughts">')
36
+ response = response.replace("</think>", "</div></details>")
37
+ return response
38
 
 
39
  css = """
40
+ .thoughts {
41
  border: 1px solid #ccc;
42
+ padding: 10px;
43
+ background-color: #f9f9f9;
44
  border-radius: 5px;
 
 
45
  }
46
+ details summary {
47
  cursor: pointer;
 
 
48
  padding: 5px;
49
+ background-color: #e0e0e0;
50
+ border-radius: 5px;
51
+ font-weight: bold;
52
+ }
53
+ details summary::-webkit-details-marker {
54
+ display: none;
55
+ }
56
+ details summary:after {
57
+ content: " ▶";
58
+ }
59
+ details[open] summary:after {
60
+ content: " ▼";
61
  }
62
  """
63
 
64
+ with gr.Blocks(css=css) as demo:
65
+ gr.Markdown("## Chat with Superthoughts")
66
+ gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work quickly.")
67
+
68
+ chatbot = gr.Chatbot()
69
+ msg = gr.Textbox()
70
+ system_message = gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message")
71
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
72
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
73
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
74
+
75
+ def user(user_message, history):
76
+ return "", history + [[user_message, None]]
77
+
78
+ def bot(history, system_message, max_tokens, temperature, top_p):
79
+ user_message, _ = history[-1]
80
+ response = ""
81
+ for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
82
+ response = partial_response
83
+ formatted_response = format_response(response)
84
+ history[-1][1] = formatted_response
85
+ return history
86
+
87
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
88
+ bot, [chatbot, system_message, max_tokens, temperature, top_p], chatbot
89
+ )
90
 
91
+ demo.launch()