Pinkstack commited on
Commit
e0d2fc3
·
verified ·
1 Parent(s): d0018d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -45
app.py CHANGED
@@ -1,54 +1,70 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
  client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
5
 
6
  def respond(
7
- message,
8
  history: list[tuple[str, str]],
9
- system_message,
10
- max_tokens,
11
- temperature,
12
- top_p,
13
- ):
14
  messages = [{"role": "system", "content": system_message}]
15
- for val in history:
16
- if val[0]:
17
- messages.append({"role": "user", "content": val[0]})
18
- if val[1]:
19
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
20
  messages.append({"role": "user", "content": message})
 
 
21
  response = ""
22
- for message in client.chat_completion(
23
- messages,
24
- max_tokens=max_tokens,
25
- stream=True,
26
- temperature=temperature,
27
- top_p=top_p,
28
- ):
29
- token = message.choices[0].delta.content
30
- response += token
31
- yield response
 
 
 
 
 
 
32
 
33
- def format_response(response):
34
- # Replace <think>...</think> with a collapsible section
35
- response = response.replace("<think>", '<details><summary>Show thoughts</summary><div class="thoughts">')
36
  response = response.replace("</think>", "</div></details>")
37
  return response
38
 
 
39
  css = """
40
  .thoughts {
41
  border: 1px solid #ccc;
42
  padding: 10px;
43
- background-color: #000000;
44
  border-radius: 5px;
 
45
  }
46
  details summary {
47
  cursor: pointer;
48
  padding: 5px;
49
- background-color: #000000;
50
  border-radius: 5px;
51
  font-weight: bold;
 
52
  }
53
  details summary::-webkit-details-marker {
54
  display: none;
@@ -61,31 +77,71 @@ details[open] summary:after {
61
  }
62
  """
63
 
 
64
  with gr.Blocks(css=css) as demo:
65
- gr.Markdown("## Chat with Superthoughts lite! (1.7B)")
66
- gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- chatbot = gr.Chatbot()
69
- msg = gr.Textbox()
70
- system_message = gr.Textbox(value="You must always include <think> ... </think> <output> </output> tokens.", label="System message")
71
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
72
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
73
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
74
-
75
- def user(user_message, history):
76
  return "", history + [[user_message, None]]
77
 
78
- def bot(history, system_message, max_tokens, temperature, top_p):
 
79
  user_message, _ = history[-1]
80
- response = ""
 
81
  for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
82
- response = partial_response
83
- formatted_response = format_response(response)
84
- history[-1][1] = formatted_response
85
- return history
86
 
87
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
88
- bot, [chatbot, system_message, max_tokens, temperature, top_p], chatbot
 
 
 
 
 
 
 
 
89
  )
90
 
91
- demo.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from typing import Iterator
4
 
5
  client = InferenceClient("Pinkstack/Superthoughts-lite-v1")
6
 
7
  def respond(
8
+ message: str,
9
  history: list[tuple[str, str]],
10
+ system_message: str,
11
+ max_tokens: int,
12
+ temperature: float,
13
+ top_p: float,
14
+ ) -> Iterator[str]:
15
  messages = [{"role": "system", "content": system_message}]
16
+
17
+ # Add history to messages
18
+ for user_msg, assistant_msg in history:
19
+ if user_msg:
20
+ messages.append({"role": "user", "content": user_msg})
21
+ if assistant_msg:
22
+ messages.append({"role": "assistant", "content": assistant_msg})
23
+
24
+ # Add current message
25
  messages.append({"role": "user", "content": message})
26
+
27
+ # Initialize response
28
  response = ""
29
+
30
+ # Stream the response
31
+ try:
32
+ for chunk in client.chat_completion(
33
+ messages,
34
+ max_tokens=max_tokens,
35
+ stream=True,
36
+ temperature=temperature,
37
+ top_p=top_p,
38
+ ):
39
+ if chunk.choices[0].delta.content is not None:
40
+ token = chunk.choices[0].delta.content
41
+ response += token
42
+ yield format_response(response)
43
+ except Exception as e:
44
+ yield f"Error: {str(e)}"
45
 
46
+ def format_response(response: str) -> str:
47
+ """Format the response with collapsible thinking sections"""
48
+ response = response.replace("<think>", '<details><summary>Show thinking 🧠</summary><div class="thoughts">')
49
  response = response.replace("</think>", "</div></details>")
50
  return response
51
 
52
+ # Custom CSS for styling
53
  css = """
54
  .thoughts {
55
  border: 1px solid #ccc;
56
  padding: 10px;
57
+ background-color: #f8f9fa;
58
  border-radius: 5px;
59
+ margin: 5px 0;
60
  }
61
  details summary {
62
  cursor: pointer;
63
  padding: 5px;
64
+ background-color: #eee;
65
  border-radius: 5px;
66
  font-weight: bold;
67
+ margin: 5px 0;
68
  }
69
  details summary::-webkit-details-marker {
70
  display: none;
 
77
  }
78
  """
79
 
80
+ # Create Gradio interface
81
  with gr.Blocks(css=css) as demo:
82
+ gr.Markdown("# Chat with Superthoughts lite! (1.7B)")
83
+ gr.Markdown("**Warning:** The first output from the AI may take a few moments. After the first message, it should work at a decent speed, keep in mind that this chat is only meant for testing and experimenting.")
84
+
85
+ chatbot = gr.Chatbot(height=600)
86
+ msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
87
+
88
+ with gr.Accordion("Advanced Settings", open=False):
89
+ system_message = gr.Textbox(
90
+ value="You must act in a conversational matter and always include <think> ... </think> <output> </output> tokens.",
91
+ label="System message"
92
+ )
93
+ max_tokens = gr.Slider(
94
+ minimum=1,
95
+ maximum=4096,
96
+ value=512,
97
+ step=1,
98
+ label="Max new tokens"
99
+ )
100
+ temperature = gr.Slider(
101
+ minimum=0.1,
102
+ maximum=4.0,
103
+ value=0.7,
104
+ step=0.1,
105
+ label="Temperature"
106
+ )
107
+ top_p = gr.Slider(
108
+ minimum=0.1,
109
+ maximum=1.0,
110
+ value=0.95,
111
+ step=0.05,
112
+ label="Top-p (nucleus sampling)"
113
+ )
114
 
115
+ def user(user_message: str, history: list) -> tuple[str, list]:
116
+ """Add user message to history"""
 
 
 
 
 
 
117
  return "", history + [[user_message, None]]
118
 
119
+ def bot(history: list, system_message: str, max_tokens: int, temperature: float, top_p: float) -> Iterator[list]:
120
+ """Generate and stream bot responses"""
121
  user_message, _ = history[-1]
122
+ history[-1][1] = "" # Initialize bot's response
123
+
124
  for partial_response in respond(user_message, history[:-1], system_message, max_tokens, temperature, top_p):
125
+ history[-1][1] = partial_response
126
+ yield history
 
 
127
 
128
+ # Set up chat message handling
129
+ msg.submit(
130
+ user,
131
+ [msg, chatbot],
132
+ [msg, chatbot],
133
+ queue=False
134
+ ).then(
135
+ bot,
136
+ [chatbot, system_message, max_tokens, temperature, top_p],
137
+ chatbot
138
  )
139
 
140
+ # Add a clear button
141
+ clear = gr.Button("Clear Conversation")
142
+ clear.click(lambda: None, None, chatbot, queue=False)
143
+
144
+ # Launch the interface
145
+ if __name__ == "__main__":
146
+ demo.queue()
147
+ demo.launch(share=True)