Nymbo commited on
Commit
e4bb2d0
·
verified ·
1 Parent(s): e13eb1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -113
app.py CHANGED
@@ -22,7 +22,8 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- selected_model,
 
26
  ):
27
  """
28
  This function handles the chatbot response. It takes in:
@@ -32,17 +33,19 @@ def respond(
32
  - max_tokens: the maximum number of tokens to generate in the response
33
  - temperature: sampling temperature
34
  - top_p: top-p (nucleus) sampling
35
- - frequency_penalty: penalize repeated tokens in the output
36
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
- - selected_model: the model to use for generating the response
 
38
  """
39
 
40
  print(f"Received message: {message}")
41
  print(f"History: {history}")
42
- print(f"System message: {system_message}")
43
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
- print(f"Selected model: {selected_model}")
 
46
 
47
  # Convert seed to None if -1 (meaning random)
48
  if seed == -1:
@@ -58,7 +61,7 @@ def respond(
58
  if user_part:
59
  messages.append({"role": "user", "content": user_part})
60
  print(f"Added user message to context: {user_part}")
61
- if assistant_part:
62
  messages.append({"role": "assistant", "content": assistant_part})
63
  print(f"Added assistant message to context: {assistant_part}")
64
 
@@ -69,19 +72,19 @@ def respond(
69
  response = ""
70
  print("Sending request to OpenAI API.")
71
 
72
- # Make the streaming request to the HF Inference API via openai-like client
73
  for message_chunk in client.chat.completions.create(
74
- model=selected_model, # Use the selected model
75
  max_tokens=max_tokens,
76
  stream=True, # Stream the response
77
  temperature=temperature,
78
  top_p=top_p,
79
- frequency_penalty=frequency_penalty, # <-- NEW
80
- seed=seed, # <-- NEW
81
- messages=messages,
82
  ):
83
  # Extract the token text from the response chunk
84
- token_text = message_chunk.choices[0].delta.content
85
  print(f"Received token: {token_text}")
86
  response += token_text
87
  yield response
@@ -92,116 +95,158 @@ def respond(
92
  chatbot = gr.Chatbot(height=600)
93
  print("Chatbot interface created.")
94
 
95
- # Define the list of featured models
96
- featured_models = [
97
- "meta-llama/Llama-3.3-70B-Instruct",
98
- "google/flan-t5-xl",
99
- "facebook/bart-large-cnn",
100
- "EleutherAI/gpt-neo-2.7B",
101
- # Add more featured models here
102
- ]
103
-
104
- # Create the Gradio Blocks interface
105
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
106
- # Tab for model selection
107
- with gr.Tab("Models"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  with gr.Row():
109
- with gr.Column():
110
- with gr.Accordion("Featured Models", open=True):
111
- model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1)
112
- model = gr.Dropdown(label="Select a model below", choices=featured_models, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True)
113
-
114
- def filter_models(search_term):
115
- filtered_models = [m for m in featured_models if search_term.lower() in m.lower()]
116
- return gr.update(choices=filtered_models)
117
-
118
- model_search.change(filter_models, inputs=model_search, outputs=model)
119
-
120
- custom_model = gr.Textbox(label="Custom Model", placeholder="Enter a custom model ID here", interactive=True)
121
-
122
- # Tab for chat interface
123
- with gr.Tab("Chat"):
124
  with gr.Row():
125
- with gr.Column():
126
- txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
127
-
128
- # Additional parameters
129
  with gr.Row():
130
- with gr.Column():
131
- system_message = gr.Textbox(label="System Message", value="", lines=3)
132
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max New Tokens")
133
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
134
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
135
- frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
136
- seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
137
-
138
- # Chatbot display
139
- chatbot = gr.Chatbot(height=600)
140
-
141
- # Submit button
142
- submit_btn = gr.Button("Submit")
143
 
144
  # Tab for information
145
- with gr.Tab("Information"):
146
  with gr.Row():
147
- gr.Markdown(
148
- """
149
- # Featured Models
150
-
151
- - **meta-llama/Llama-3.3-70B-Instruct**: A large language model from Meta.
152
- - **google/flan-t5-xl**: A pretrained encoder-decoder model from Google.
153
- - **facebook/bart-large-cnn**: A pretrained sequence-to-sequence model from Facebook.
154
- - **EleutherAI/gpt-neo-2.7B**: A large autoregressive language model from EleutherAI.
155
-
156
- # Parameters Overview
157
-
158
- - **System Message**: Sets the behavior and context for the assistant.
159
- - **Max New Tokens**: Limits the length of the generated response.
160
- - **Temperature**: Controls the randomness of the output. Higher values make output more random.
161
- - **Top-P**: Controls the diversity of text by selecting tokens that account for top-p probability mass.
162
- - **Frequency Penalty**: Decreases the model's likelihood to repeat the same lines.
163
- - **Seed**: Ensures reproducibility of results; set to -1 for random seed.
164
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  )
166
-
167
- # Function to handle chat submission
168
- def user(user_message, history):
169
- return "", history + [[user_message, None]]
170
-
171
- # Function to process the chat
172
- def bot(history, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, selected_model):
173
- # Get the last user message
174
- user_message = history[-1][0]
175
- # Generate response
176
- response_iter = respond(
177
- user_message,
178
- history[:-1], # Exclude the last user message which doesn't have a response yet
179
- system_message,
180
- max_tokens,
181
- temperature,
182
- top_p,
183
- frequency_penalty,
184
- seed,
185
- selected_model,
186
- )
187
- # Collect the entire response
188
- full_response = ""
189
- for resp in response_iter:
190
- full_response = resp
191
- # Update history with the bot's response
192
- history[-1][1] = full_response
193
- return history
194
-
195
- # Set up the chat flow
196
- txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
197
- bot, [chatbot, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, model], chatbot
198
- )
199
- submit_btn.click(user, [txt, chatbot], [txt, chatbot], queue=False).then(
200
- bot, [chatbot, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, model], chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  )
202
 
203
  print("Gradio interface initialized.")
204
 
205
  if __name__ == "__main__":
206
- print("Launching the demo application.")
207
- demo.launch()
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ model,
26
+ custom_model
27
  ):
28
  """
29
  This function handles the chatbot response. It takes in:
 
33
  - max_tokens: the maximum number of tokens to generate in the response
34
  - temperature: sampling temperature
35
  - top_p: top-p (nucleus) sampling
36
+ - frequency_penalty: penalize repeated tokens in the response
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
+ - model: the selected model
39
+ - custom_model: the custom model path
40
  """
41
 
42
  print(f"Received message: {message}")
43
  print(f"History: {history}")
44
+ print(f"system message: {system_message}")
45
+ print(f"max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
+ print(f"Selected Model: {model}")
48
+ print(f"Custom model: {custom_model}")
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
 
61
  if user_part:
62
  messages.append({"role": "user", "content": user_part})
63
  print(f"Added user message to context: {user_part}")
64
+ ifassistant_part:
65
  messages.append({"role": "assistant", "content": assistant_part})
66
  print(f"Added assistant message to context: {assistant_part}")
67
 
 
72
  response = ""
73
  print("Sending request to OpenAI API.")
74
 
75
+ # Make the request to the HF Inference API via openAI-like client
76
  for message_chunk in client.chat.completions.create(
77
+ model=custom_model if custom_model.strip() != "" else model,
78
  max_tokens=max_tokens,
79
  stream=True, # Stream the response
80
  temperature=temperature,
81
  top_p=top_p,
82
+ frequency_penalty=frequency_penalty, # <--
83
+ seed=seed, # <--
84
+ messages=messages
85
  ):
86
  # Extract the token text from the response chunk
87
+ token_text = message_chunk.choices[0].message.content
88
  print(f"Received token: {token_text}")
89
  response += token_text
90
  yield response
 
95
  chatbot = gr.Chatbot(height=600)
96
  print("Chatbot interface created.")
97
 
98
+ # Define the Gradio interface
99
+ with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
100
+ # Tab for basic settings
101
+ with gr.Tab("Basic Settings"):
102
+ with gr.Column(elem_id="prompt-container"):
103
+ with gr.Row():
104
+ # Textbox for user to input the message
105
+ text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
106
+ with gr.Row():
107
+ # Textbox for custom model input
108
+ custom_model = gr.textbox(label="Custom Model", info="HuggingFace model path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct", lines=1, elem_id="model-search-input")
109
+ # Accordion for selecting the model
110
+ with gr.Accordion("Featured models", open=True):
111
+ # Textbox for searching models
112
+ model_search = gr.textbox(Label="Filter models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
113
+ # Radio buttons to select the desired model
114
+ model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=[
115
+ "meta-llama/Llama-3.3-70B-Instruct",
116
+ "anthropic/claude-3",
117
+ "anthropic/claude-instant-3",
118
+ "anthropic/claude-2",
119
+ "anthropic/claude-2",
120
+ "anthropic/claude-instant-2",
121
+ "anthropic/claude-1.3",
122
+ "anthropic/claude-instant-1.3",
123
+ "anthropic/claude-1",
124
+ "anthropic/claude-instant-1",
125
+ "anthropic/claude-0.3",
126
+ "anthropic/claude-instant-0.3",
127
+ "anthropic/claude-0.1",
128
+ "anthropic/claude-instant-0.1",
129
+ "anthropic/claude-v2",
130
+ "anthropic/claude-instant-v2",
131
+ "anthropic/claude-v1",
132
+ "anthropic/claude-instant-v1",
133
+ "anthropic/claude-v0.3",
134
+ "anthropic/claude-instant-v0.3",
135
+ "anthropic/claude-v0.1",
136
+ "anthropic/claude-instant-v0.1",
137
+ ], interactive=True, elem_id="model-radio")
138
+
139
+ # Filtering models based on search input
140
+ def filter_models(search_term):
141
+ filtered_models = [m for m in model.choices if search_term.lower() in m.lower()]
142
+ return gr.update(choices=filtered_models)
143
+
144
+ # Update model list when search box is used
145
+ model_search.change(filter_models, inputs=model, outputs=model)
146
+
147
+ # Tab for advanced settings
148
+ with gr.Tab("Advanced Settings"):
149
  with gr.Row():
150
+ # Text box for specifying the system message
151
+ system_message = gr.text box(value="", label="System message")
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  with gr.Row():
153
+ # Slider for setting the maximum new tokens
154
+ max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
 
 
155
  with gr.Row():
156
+ # Slider for setting the temperature
157
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
158
+ with gr.Row():
159
+ #Slider for setting top-p
160
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
161
+ with gr.Row():
162
+ #Slider for setting frequency penalty
163
+ frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
164
+ with gr.Row():
165
+ #Slider for setting the seed
166
+ seed = gr.SLider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
167
 
168
  # Tab for information
169
+ with gr.tab("Information"):
170
  with gr.Row():
171
+ # Display a sample prompt
172
+ gr.textbox(label="Sample prompt", value="Enter a prompt | ultra detail, ultra elaboration, ultra quality, perfect.")
173
+ with gr.Accordion("Featured Models (WiP)", open=False):
174
+ gr.html(
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  """
176
+ <p><a href="https://huggingface.co/models?inferences=warm&pipeline_tag=text-to-text&sort=trending">View more models</a></p>
177
+ <table style="width:100%; text-align:center; margin:auto;">
178
+ <tr>
179
+ <th>Model</th>
180
+ <th>Description</th>
181
+ </tr>
182
+ <tr>
183
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
184
+ <td>High-quality, large-scale language model</td>
185
+ </tr>
186
+ <tr>
187
+ <td>anthropic/claude-3</td>
188
+ <td> Advanced conversational AI model</td>
189
+ </tr>
190
+ <tr>
191
+ <td>anthropic/claude-instant-3</td>
192
+ <td> Fast and efficient conversational AI model</td>
193
+ </tr>
194
+ </table>
195
+ """
196
  )
197
+ with gr.Accordion("Parameters Overview", open=False):
198
+ gr.markdown(
199
+ """
200
+ ## System Message
201
+ - **Description**: The system message provides context and instructions to the model.
202
+ - **Default**: ""
203
+
204
+ ## Max New Tokens
205
+ - **Description**: The maximum number of tokens to generate in the response.
206
+ - **Default**: 512
207
+ - **Range**: 1 to 4096
208
+
209
+ ## Temperature
210
+ - **Description**: Controls the randomness of the output. Lower values make the output more deterministic, higher values make it output more varied.
211
+ - **Default**: 0.7
212
+ - **Range**: 0.1 to 4.0
213
+
214
+ ## Top-P
215
+ - **Description**: Controls the diversity of the output. Lower values make the output more focused, higher values make it more varied.
216
+ - **Default**: 0.7
217
+ - **Range**: 0.1 to 1.0
218
+
219
+ ## Frequency Penalty
220
+ - **Description**: Penalizes repeated tokens in the response. Higher values makes the output less repetitive.
221
+ - **Default**: 0.0
222
+ - **Range**: -2.0 to 2.0
223
+
224
+ ## Seed
225
+ - **Description**: A fixed seed for reproducibility. -1 for random.
226
+ - **Default**: -1
227
+ - **Range**: -1 to 65535
228
+
229
+ """
230
+ )
231
+ """
232
+
233
+ # Row containing the 'Run' button to trigger the query function
234
+ with gr.Row():
235
+ text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
236
+ # Row for displaying the generated response
237
+ with gr.Row():
238
+ response_output = gr.Textbox(label="Response Output", elem_id="response-output")
239
+
240
+ # Set up button to call the respond function
241
+ text_button.click(
242
+ respond,
243
+ inputs=[
244
+ text_prompt, model, custom_model, system_message, max_tokens, temperature, top_p, frequency_penalty, seed
245
+ ],
246
+ outputs=[response_output]
247
  )
248
 
249
  print("Gradio interface initialized.")
250
 
251
  if __name__ == "__main__":
252
+ demo.launch(show_api=False, share=False)