Nymbo commited on
Commit
d735dab
·
verified ·
1 Parent(s): 69b4a5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -122
app.py CHANGED
@@ -16,63 +16,71 @@ print("OpenAI client initialized.")
16
  def respond(
17
  message,
18
  history: list[tuple[str, str]],
19
- model,
20
- custom_model,
21
  system_message,
22
  max_tokens,
23
  temperature,
24
  top_p,
25
  frequency_penalty,
26
- seed
 
27
  ):
28
  """
29
- This function handles the chatbot response.
 
 
 
 
 
 
 
 
 
30
  """
 
31
  print(f"Received message: {message}")
32
  print(f"History: {history}")
33
- print(f"Model: {model}")
34
- print(f"Custom model: {custom_model}")
35
  print(f"System message: {system_message}")
36
- print(f"Parameters - Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
37
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
 
38
 
39
- # Convert seed to None if -1
40
  if seed == -1:
41
  seed = None
42
 
43
- # Set the model based on selection or custom input
44
- selected_model = custom_model.strip() if custom_model.strip() != "" else model
45
-
46
- # Construct messages array
47
  messages = [{"role": "system", "content": system_message}]
48
 
49
- # Add conversation history
50
  for val in history:
51
  user_part = val[0]
52
  assistant_part = val[1]
53
  if user_part:
54
  messages.append({"role": "user", "content": user_part})
 
55
  if assistant_part:
56
  messages.append({"role": "assistant", "content": assistant_part})
 
57
 
58
- # Append latest message
59
  messages.append({"role": "user", "content": message})
60
 
61
- # Start with empty response
62
  response = ""
63
- print("Sending request to API.")
64
 
65
- # Make the streaming request
66
  for message_chunk in client.chat.completions.create(
67
- model=selected_model,
68
  max_tokens=max_tokens,
69
- stream=True,
70
  temperature=temperature,
71
  top_p=top_p,
72
- frequency_penalty=frequency_penalty,
73
- seed=seed,
74
  messages=messages,
75
  ):
 
76
  token_text = message_chunk.choices[0].delta.content
77
  print(f"Received token: {token_text}")
78
  response += token_text
@@ -80,135 +88,168 @@ def respond(
80
 
81
  print("Completed response generation.")
82
 
83
- # Create Chatbot component
84
  chatbot = gr.Chatbot(height=600)
85
  print("Chatbot interface created.")
86
 
87
- # Define available models
88
- models_list = [
89
- "meta-llama/Llama-2-70b-chat-hf",
90
- "meta-llama/Llama-2-13b-chat-hf",
91
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
92
- "mistralai/Mistral-7B-Instruct-v0.2",
93
- "HuggingFaceH4/zephyr-7b-beta",
94
- ]
95
-
96
- # Create the Gradio interface with tabs
97
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
98
- with gr.Tab("Chat"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  with gr.Row():
100
- with gr.Column():
101
- # Model selection accordion
102
- with gr.Accordion("Featured Models", open=True):
103
- model_search = gr.Textbox(
104
- label="Filter Models",
105
- placeholder="Search for a model...",
106
- lines=1
107
- )
108
- model = gr.Radio(
109
- label="Select a model",
110
- choices=models_list,
111
- value="meta-llama/Llama-2-70b-chat-hf"
112
- )
113
-
114
- # Custom model input
115
- custom_model = gr.Textbox(
116
- label="Custom Model",
117
- info="Enter Hugging Face model path (optional)",
118
- placeholder="organization/model-name"
119
- )
120
-
121
- # System message and parameters
122
- system_message = gr.Textbox(label="System message")
123
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
124
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
125
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
126
- frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
127
- seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
128
 
 
129
  with gr.Tab("Information"):
130
- with gr.Accordion("Featured Models", open=False):
131
- gr.HTML("""
132
- <p><a href="https://huggingface.co/models?pipeline_tag=text-generation&sort=trending">See all available models</a></p>
 
 
 
 
 
 
133
  <table style="width:100%; text-align:center; margin:auto;">
134
  <tr>
135
  <th>Model Name</th>
136
- <th>Parameters</th>
137
  <th>Notes</th>
138
  </tr>
139
  <tr>
140
- <td>Llama-2-70b-chat</td>
141
- <td>70B</td>
142
- <td>Meta's largest chat model</td>
 
 
 
 
 
143
  </tr>
144
  <tr>
145
- <td>Mixtral-8x7B</td>
146
- <td>47B</td>
147
- <td>Mixture of Experts architecture</td>
148
  </tr>
149
  <tr>
150
- <td>Mistral-7B</td>
151
- <td>7B</td>
152
- <td>Efficient base model</td>
153
  </tr>
154
  </table>
155
- """)
 
156
 
 
157
  with gr.Accordion("Parameters Overview", open=False):
158
- gr.Markdown("""
159
- ## System Message
160
- The system message sets the context and behavior for the AI assistant. It's like giving it a role or specific instructions.
161
-
162
  ## Max New Tokens
163
- Controls the maximum length of the generated response. Higher values allow for longer responses but take more time.
164
 
165
  ## Temperature
166
- Controls randomness in the response:
167
- - Lower (0.1-0.5): More focused and deterministic
168
- - Higher (0.7-1.0): More creative and varied
169
 
170
  ## Top-P
171
- Nucleus sampling parameter:
172
- - Lower values: More focused on likely tokens
173
- - Higher values: More diverse vocabulary usage
174
 
175
  ## Frequency Penalty
176
- Discourages repetition:
177
- - Negative: May allow more repetition
178
- - Positive: Encourages more diverse word choice
179
 
180
  ## Seed
181
- Controls randomness initialization:
182
- - -1: Random seed each time
183
- - Fixed value: Reproducible outputs
184
- """)
185
-
186
- # Function to filter models based on search
187
- def filter_models(search_term):
188
- filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
189
- return gr.update(choices=filtered_models)
190
-
191
- # Connect the search box to the model filter function
192
- model_search.change(filter_models, inputs=model_search, outputs=model)
193
-
194
- # Create the chat interface
195
- chat_interface = gr.ChatInterface(
196
- respond,
197
- additional_inputs=[
198
- model,
199
- custom_model,
200
- system_message,
201
- max_tokens,
202
- temperature,
203
- top_p,
204
- frequency_penalty,
205
- seed,
206
- ],
207
- chatbot=chatbot,
208
- )
209
 
210
- print("Gradio interface initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- if __name__ == "__main__":
213
- print("Launching the demo application.")
214
- demo.launch(show_api=False, share=False)
 
16
  def respond(
17
  message,
18
  history: list[tuple[str, str]],
 
 
19
  system_message,
20
  max_tokens,
21
  temperature,
22
  top_p,
23
  frequency_penalty,
24
+ seed,
25
+ model
26
  ):
27
  """
28
+ This function handles the chatbot response. It takes in:
29
+ - message: the user's new message
30
+ - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
31
+ - system_message: the system prompt
32
+ - max_tokens: the maximum number of tokens to generate in the response
33
+ - temperature: sampling temperature
34
+ - top_p: top-p (nucleus) sampling
35
+ - frequency_penalty: penalize repeated tokens in the output
36
+ - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
+ - model: the model to use for text generation
38
  """
39
+
40
  print(f"Received message: {message}")
41
  print(f"History: {history}")
 
 
42
  print(f"System message: {system_message}")
43
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
+ print(f"Model: {model}")
46
 
47
+ # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
49
  seed = None
50
 
51
+ # Construct the messages array required by the API
 
 
 
52
  messages = [{"role": "system", "content": system_message}]
53
 
54
+ # Add conversation history to the context
55
  for val in history:
56
  user_part = val[0]
57
  assistant_part = val[1]
58
  if user_part:
59
  messages.append({"role": "user", "content": user_part})
60
+ print(f"Added user message to context: {user_part}")
61
  if assistant_part:
62
  messages.append({"role": "assistant", "content": assistant_part})
63
+ print(f"Added assistant message to context: {assistant_part}")
64
 
65
+ # Append the latest user message
66
  messages.append({"role": "user", "content": message})
67
 
68
+ # Start with an empty string to build the response as tokens stream in
69
  response = ""
70
+ print("Sending request to OpenAI API.")
71
 
72
+ # Make the streaming request to the HF Inference API via openai-like client
73
  for message_chunk in client.chat.completions.create(
74
+ model=model, # Use the selected model
75
  max_tokens=max_tokens,
76
+ stream=True, # Stream the response
77
  temperature=temperature,
78
  top_p=top_p,
79
+ frequency_penalty=frequency_penalty, # <-- NEW
80
+ seed=seed, # <-- NEW
81
  messages=messages,
82
  ):
83
+ # Extract the token text from the response chunk
84
  token_text = message_chunk.choices[0].delta.content
85
  print(f"Received token: {token_text}")
86
  response += token_text
 
88
 
89
  print("Completed response generation.")
90
 
91
+ # Create a Chatbot component with a specified height
92
  chatbot = gr.Chatbot(height=600)
93
  print("Chatbot interface created.")
94
 
95
+ # Create the Gradio ChatInterface
96
+ # We add two new sliders for Frequency Penalty and Seed
97
+ demo = gr.ChatInterface(
98
+ respond,
99
+ additional_inputs=[
100
+ gr.Textbox(value="", label="System message"),
101
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
102
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
104
+ gr.Slider(
105
+ minimum=-2.0,
106
+ maximum=2.0,
107
+ value=0.0,
108
+ step=0.1,
109
+ label="Frequency Penalty"
110
+ ),
111
+ gr.Slider(
112
+ minimum=-1,
113
+ maximum=65535, # Arbitrary upper limit for demonstration
114
+ value=-1,
115
+ step=1,
116
+ label="Seed (-1 for random)"
117
+ ),
118
+ gr.Textbox(label="Custom Model", placeholder="Enter a custom model path"),
119
+ ],
120
+ fill_height=True,
121
+ chatbot=chatbot,
122
+ theme="Nymbo/Nymbo_Theme",
123
+ )
124
+ print("Gradio interface initialized.")
125
+
126
+ # Define the Gradio interface
127
+ with gr.Blocks(theme='Nymbo/Nymbo_Theme_5') as textgen:
128
+ # Tab for basic settings
129
+ with gr.Tab("Basic Settings"):
130
+ with gr.Row():
131
+ with gr.Column(elem_id="prompt-container"):
132
+ with gr.Row():
133
+ # Textbox for user to input the prompt
134
+ text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
135
+ with gr.Row():
136
+ # Textbox for custom model input
137
+ custom_model = gr.Textbox(label="Custom Model", info="Model Hugging Face path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct")
138
+ with gr.Row():
139
+ # Accordion for selecting the model
140
+ with gr.Accordion("Featured Models", open=True):
141
+ # Textbox for searching models
142
+ model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
143
+ models_list = (
144
+ "meta-llama/Llama-3.3-70B-Instruct",
145
+ "meta-llama/Llama-3.3-13B-Instruct",
146
+ "meta-llama/Llama-3.3-30B-Instruct",
147
+ "meta-llama/Llama-3.3-7B-Instruct",
148
+ )
149
+
150
+ # Radio buttons to select the desired model
151
+ model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
152
+
153
+ # Filtering models based on search input
154
+ def filter_models(search_term):
155
+ filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
156
+ return gr.update(choices=filtered_models)
157
+
158
+ # Update model list when search box is used
159
+ model_search.change(filter_models, inputs=model_search, outputs=model)
160
+
161
+ # Tab for advanced settings
162
+ with gr.Tab("Advanced Settings"):
163
+ with gr.Row():
164
+ # Slider for setting the maximum number of new tokens
165
+ max_tokens = gr.Slider(label="Max new tokens", value=512, minimum=1, maximum=4096, step=1)
166
+ with gr.Row():
167
+ # Slider for setting the temperature
168
+ temperature = gr.Slider(label="Temperature", value=0.7, minimum=0.1, maximum=4.0, step=0.1)
169
  with gr.Row():
170
+ # Slider for setting the top-p (nucleus) sampling
171
+ top_p = gr.Slider(label="Top-P", value=0.95, minimum=0.1, maximum=1.0, step=0.05)
172
+ with gr.Row():
173
+ # Slider for setting the frequency penalty
174
+ frequency_penalty = gr.Slider(label="Frequency Penalty", value=0.0, minimum=-2.0, maximum=2.0, step=0.1)
175
+ with gr.Row():
176
+ # Slider for setting the seed for reproducibility
177
+ seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=65535, step=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ # Tab to provide information to the user
180
  with gr.Tab("Information"):
181
+ with gr.Row():
182
+ # Display a sample prompt for guidance
183
+ gr.Textbox(label="Sample prompt", value="{prompt} | ultra detail, ultra elaboration, ultra quality, perfect.")
184
+
185
+ # Accordion displaying featured models
186
+ with gr.Accordion("Featured Models (WiP)", open=False):
187
+ gr.HTML(
188
+ """
189
+ <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
190
  <table style="width:100%; text-align:center; margin:auto;">
191
  <tr>
192
  <th>Model Name</th>
193
+ <th>Typography</th>
194
  <th>Notes</th>
195
  </tr>
196
  <tr>
197
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
198
+ <td>✅</td>
199
+ <td></td>
200
+ </tr>
201
+ <tr>
202
+ <td>meta-llama/Llama-3.3-13B-Instruct</td>
203
+ <td>✅</td>
204
+ <td></td>
205
  </tr>
206
  <tr>
207
+ <td>meta-llama/Llama-3.3-30B-Instruct</td>
208
+ <td>✅</td>
209
+ <td></td>
210
  </tr>
211
  <tr>
212
+ <td>meta-llama/Llama-3.3-7B-Instruct</td>
213
+ <td>✅</td>
214
+ <td></td>
215
  </tr>
216
  </table>
217
+ """
218
+ )
219
 
220
+ # Accordion providing an overview of advanced settings
221
  with gr.Accordion("Parameters Overview", open=False):
222
+ gr.Markdown(
223
+ """
 
 
224
  ## Max New Tokens
225
+ ###### This slider allows you to specify the maximum number of tokens to generate in the response. The default value is 512, and the maximum output is 4096.
226
 
227
  ## Temperature
228
+ ###### The temperature controls the randomness of the output. A higher temperature makes the output more random, while a lower temperature makes it more deterministic. The default value is 0.7.
 
 
229
 
230
  ## Top-P
231
+ ###### Top-P (nucleus) sampling is a way to control the diversity of the output. A higher value allows for more diverse outputs, while a lower value makes the output more focused. The default value is 0.95.
 
 
232
 
233
  ## Frequency Penalty
234
+ ###### The frequency penalty penalizes repeated tokens in the output. A higher value makes the output more diverse, while a lower value allows for more repetition. The default value is 0.0.
 
 
235
 
236
  ## Seed
237
+ ###### The seed is a fixed value for reproducibility. If you find a seed that gives you a result you love, you can use it again to create a similar output. If you leave it at -1, the AI will generate a new seed every time.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
+ ### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
240
+ """
241
+ )
242
+
243
+ # Row containing the 'Run' button to trigger the text generation
244
+ with gr.Row():
245
+ text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
246
+ # Row for displaying the generated text output
247
+ with gr.Row():
248
+ text_output = gr.Textbox(label="Text Output", elem_id="text-output")
249
+
250
+ # Set up button click event to call the respond function
251
+ text_button.click(respond, inputs=[text_prompt, chatbot, gr.Textbox(value="", label="System message"), max_tokens, temperature, top_p, frequency_penalty, seed, model], outputs=text_output)
252
 
253
+ print("Launching Gradio interface...") # Debug log
254
+ # Launch the Gradio interface without showing the API or sharing externally
255
+ textgen.launch(show_api=False, share=False)