Nymbo commited on
Commit
86297f5
·
verified ·
1 Parent(s): e4bb2d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -163
app.py CHANGED
@@ -33,19 +33,26 @@ def respond(
33
  - max_tokens: the maximum number of tokens to generate in the response
34
  - temperature: sampling temperature
35
  - top_p: top-p (nucleus) sampling
36
- - frequency_penalty: penalize repeated tokens in the response
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
- - model: the selected model
39
- - custom_model: the custom model path
40
  """
41
 
42
  print(f"Received message: {message}")
43
  print(f"History: {history}")
44
- print(f"system message: {system_message}")
45
- print(f"max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
- print(f"Selected Model: {model}")
48
- print(f"Custom model: {custom_model}")
 
 
 
 
 
 
 
49
 
50
  # Convert seed to None if -1 (meaning random)
51
  if seed == -1:
@@ -61,7 +68,7 @@ def respond(
61
  if user_part:
62
  messages.append({"role": "user", "content": user_part})
63
  print(f"Added user message to context: {user_part}")
64
- ifassistant_part:
65
  messages.append({"role": "assistant", "content": assistant_part})
66
  print(f"Added assistant message to context: {assistant_part}")
67
 
@@ -70,24 +77,25 @@ def respond(
70
 
71
  # Start with an empty string to build the response as tokens stream in
72
  response = ""
73
- print("Sending request to OpenAI API.")
74
 
75
- # Make the request to the HF Inference API via openAI-like client
76
  for message_chunk in client.chat.completions.create(
77
- model=custom_model if custom_model.strip() != "" else model,
78
  max_tokens=max_tokens,
79
  stream=True, # Stream the response
80
  temperature=temperature,
81
  top_p=top_p,
82
- frequency_penalty=frequency_penalty, # <--
83
- seed=seed, # <--
84
- messages=messages
85
  ):
86
  # Extract the token text from the response chunk
87
- token_text = message_chunk.choices[0].message.content
88
- print(f"Received token: {token_text}")
89
- response += token_text
90
- yield response
 
91
 
92
  print("Completed response generation.")
93
 
@@ -95,158 +103,108 @@ def respond(
95
  chatbot = gr.Chatbot(height=600)
96
  print("Chatbot interface created.")
97
 
98
- # Define the Gradio interface
99
- with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
100
- # Tab for basic settings
101
- with gr.Tab("Basic Settings"):
102
- with gr.Column(elem_id="prompt-container"):
103
- with gr.Row():
104
- # Textbox for user to input the message
105
- text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
106
- with gr.Row():
107
- # Textbox for custom model input
108
- custom_model = gr.textbox(label="Custom Model", info="HuggingFace model path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct", lines=1, elem_id="model-search-input")
109
- # Accordion for selecting the model
110
- with gr.Accordion("Featured models", open=True):
111
- # Textbox for searching models
112
- model_search = gr.textbox(Label="Filter models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
113
- # Radio buttons to select the desired model
114
- model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=[
115
- "meta-llama/Llama-3.3-70B-Instruct",
116
- "anthropic/claude-3",
117
- "anthropic/claude-instant-3",
118
- "anthropic/claude-2",
119
- "anthropic/claude-2",
120
- "anthropic/claude-instant-2",
121
- "anthropic/claude-1.3",
122
- "anthropic/claude-instant-1.3",
123
- "anthropic/claude-1",
124
- "anthropic/claude-instant-1",
125
- "anthropic/claude-0.3",
126
- "anthropic/claude-instant-0.3",
127
- "anthropic/claude-0.1",
128
- "anthropic/claude-instant-0.1",
129
- "anthropic/claude-v2",
130
- "anthropic/claude-instant-v2",
131
- "anthropic/claude-v1",
132
- "anthropic/claude-instant-v1",
133
- "anthropic/claude-v0.3",
134
- "anthropic/claude-instant-v0.3",
135
- "anthropic/claude-v0.1",
136
- "anthropic/claude-instant-v0.1",
137
- ], interactive=True, elem_id="model-radio")
138
-
139
- # Filtering models based on search input
140
- def filter_models(search_term):
141
- filtered_models = [m for m in model.choices if search_term.lower() in m.lower()]
142
- return gr.update(choices=filtered_models)
143
-
144
- # Update model list when search box is used
145
- model_search.change(filter_models, inputs=model, outputs=model)
146
-
147
- # Tab for advanced settings
148
- with gr.Tab("Advanced Settings"):
149
- with gr.Row():
150
- # Text box for specifying the system message
151
- system_message = gr.text box(value="", label="System message")
152
- with gr.Row():
153
- # Slider for setting the maximum new tokens
154
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
155
- with gr.Row():
156
- # Slider for setting the temperature
157
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
158
- with gr.Row():
159
- #Slider for setting top-p
160
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
161
- with gr.Row():
162
- #Slider for setting frequency penalty
163
- frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
164
- with gr.Row():
165
- #Slider for setting the seed
166
- seed = gr.SLider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
167
 
168
- # Tab for information
169
- with gr.tab("Information"):
 
170
  with gr.Row():
171
- # Display a sample prompt
172
- gr.textbox(label="Sample prompt", value="Enter a prompt | ultra detail, ultra elaboration, ultra quality, perfect.")
173
- with gr.Accordion("Featured Models (WiP)", open=False):
174
- gr.html(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  """
176
- <p><a href="https://huggingface.co/models?inferences=warm&pipeline_tag=text-to-text&sort=trending">View more models</a></p>
177
- <table style="width:100%; text-align:center; margin:auto;">
178
- <tr>
179
- <th>Model</th>
180
- <th>Description</th>
181
- </tr>
182
- <tr>
183
- <td>meta-llama/Llama-3.3-70B-Instruct</td>
184
- <td>High-quality, large-scale language model</td>
185
- </tr>
186
- <tr>
187
- <td>anthropic/claude-3</td>
188
- <td> Advanced conversational AI model</td>
189
- </tr>
190
- <tr>
191
- <td>anthropic/claude-instant-3</td>
192
- <td> Fast and efficient conversational AI model</td>
193
- </tr>
194
- </table>
195
- """
196
  )
 
197
  with gr.Accordion("Parameters Overview", open=False):
198
- gr.markdown(
199
- """
200
- ## System Message
201
- - **Description**: The system message provides context and instructions to the model.
202
- - **Default**: ""
203
-
204
- ## Max New Tokens
205
- - **Description**: The maximum number of tokens to generate in the response.
206
- - **Default**: 512
207
- - **Range**: 1 to 4096
208
-
209
- ## Temperature
210
- - **Description**: Controls the randomness of the output. Lower values make the output more deterministic, higher values make it output more varied.
211
- - **Default**: 0.7
212
- - **Range**: 0.1 to 4.0
213
-
214
- ## Top-P
215
- - **Description**: Controls the diversity of the output. Lower values make the output more focused, higher values make it more varied.
216
- - **Default**: 0.7
217
- - **Range**: 0.1 to 1.0
218
-
219
- ## Frequency Penalty
220
- - **Description**: Penalizes repeated tokens in the response. Higher values makes the output less repetitive.
221
- - **Default**: 0.0
222
- - **Range**: -2.0 to 2.0
223
-
224
- ## Seed
225
- - **Description**: A fixed seed for reproducibility. -1 for random.
226
- - **Default**: -1
227
- - **Range**: -1 to 65535
228
-
229
- """
230
  )
231
- """
232
-
233
- # Row containing the 'Run' button to trigger the query function
234
- with gr.Row():
235
- text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
236
- # Row for displaying the generated response
237
- with gr.Row():
238
- response_output = gr.Textbox(label="Response Output", elem_id="response-output")
239
-
240
- # Set up button to call the respond function
241
- text_button.click(
242
- respond,
243
- inputs=[
244
- text_prompt, model, custom_model, system_message, max_tokens, temperature, top_p, frequency_penalty, seed
245
- ],
246
- outputs=[response_output]
247
- )
248
 
249
  print("Gradio interface initialized.")
250
 
251
  if __name__ == "__main__":
252
- demo.launch(show_api=False, share=False)
 
 
33
  - max_tokens: the maximum number of tokens to generate in the response
34
  - temperature: sampling temperature
35
  - top_p: top-p (nucleus) sampling
36
+ - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
+ - model: the selected model from the featured list
39
+ - custom_model: a custom model specified by the user
40
  """
41
 
42
  print(f"Received message: {message}")
43
  print(f"History: {history}")
44
+ print(f"System message: {system_message}")
45
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
+ print(f"Model: {model}, Custom Model: {custom_model}")
48
+
49
+ # Determine the model to use
50
+ if custom_model.strip() != "":
51
+ selected_model = custom_model.strip()
52
+ else:
53
+ selected_model = model
54
+
55
+ print(f"Selected model for inference: {selected_model}")
56
 
57
  # Convert seed to None if -1 (meaning random)
58
  if seed == -1:
 
68
  if user_part:
69
  messages.append({"role": "user", "content": user_part})
70
  print(f"Added user message to context: {user_part}")
71
+ if assistant_part:
72
  messages.append({"role": "assistant", "content": assistant_part})
73
  print(f"Added assistant message to context: {assistant_part}")
74
 
 
77
 
78
  # Start with an empty string to build the response as tokens stream in
79
  response = ""
80
+ print(f"Sending request to OpenAI API using model: {selected_model}.")
81
 
82
+ # Make the streaming request to the HF Inference API via openai-like client
83
  for message_chunk in client.chat.completions.create(
84
+ model=selected_model,
85
  max_tokens=max_tokens,
86
  stream=True, # Stream the response
87
  temperature=temperature,
88
  top_p=top_p,
89
+ frequency_penalty=frequency_penalty,
90
+ seed=seed,
91
+ messages=messages,
92
  ):
93
  # Extract the token text from the response chunk
94
+ token_text = message_chunk.choices[0].delta.content
95
+ if token_text is not None:
96
+ print(f"Received token: {token_text}")
97
+ response += token_text
98
+ yield response
99
 
100
  print("Completed response generation.")
101
 
 
103
  chatbot = gr.Chatbot(height=600)
104
  print("Chatbot interface created.")
105
 
106
+ # Define featured models
107
+ featured_models_list = [
108
+ "meta-llama/Llama-3.3-70B-Instruct",
109
+ "mistralai/Mistral-7B-v0.1",
110
+ "google/gemma-7b",
111
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ # Create the Gradio ChatInterface
114
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
115
+ with gr.Tab("Chat"):
116
  with gr.Row():
117
+ with gr.Column():
118
+ # Chat interface
119
+ gr.ChatInterface(
120
+ respond,
121
+ additional_inputs=[
122
+ gr.Textbox(value="", label="System message"),
123
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
124
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
125
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
126
+ gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty"),
127
+ gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)"),
128
+ gr.Dropdown(label="Featured Models", choices=featured_models_list, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True),
129
+ gr.Textbox(value="", label="Custom Model (Optional)"),
130
+ ],
131
+ fill_height=True,
132
+ chatbot=chatbot,
133
+ )
134
+ with gr.Column():
135
+ # Featured models accordion
136
+ with gr.Accordion("Featured Models", open=True):
137
+ model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1)
138
+ model_radio = gr.Radio(label="Select a model below", choices=featured_models_list, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True)
139
+
140
+ def filter_models(search_term):
141
+ filtered_models = [m for m in featured_models_list if search_term.lower() in m.lower()]
142
+ return gr.update(choices=filtered_models)
143
+
144
+ model_search.change(filter_models, inputs=model_search, outputs=model_radio)
145
+
146
+ # Custom model textbox
147
+ custom_model_textbox = gr.Textbox(label="Custom Model", placeholder="Enter a custom model path here (optional)", lines=1)
148
+
149
+ with gr.Tab("Information"):
150
+ with gr.Accordion("Featured Models", open=False):
151
+ gr.HTML(
152
+ """
153
+ <p><a href="https://huggingface.co/models?pipeline_tag=text-generation&sort=trending">See all available models</a></p>
154
+ <table style="width:100%; text-align:center; margin:auto;">
155
+ <tr>
156
+ <th>Model Name</th>
157
+ <th>Notes</th>
158
+ </tr>
159
+ <tr>
160
+ <td>meta-llama/Llama-3.3-70B-Instruct</td>
161
+ <td>Powerful large language model.</td>
162
+ </tr>
163
+ <tr>
164
+ <td>mistralai/Mistral-7B-v0.1</td>
165
+ <td>A smaller, efficient model.</td>
166
+ </tr>
167
+ <tr>
168
+ <td>google/gemma-7b</td>
169
+ <td>Google's language model.</td>
170
+ </tr>
171
+ </table>
172
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  )
174
+
175
  with gr.Accordion("Parameters Overview", open=False):
176
+ gr.Markdown(
177
+ """
178
+ ## Parameters Overview
179
+
180
+ ### System Message
181
+ The system message is an initial instruction or context that you provide to the chatbot. It sets the stage for the conversation and can be used to guide the chatbot's behavior or persona.
182
+
183
+ ### Max New Tokens
184
+ This parameter limits the length of the chatbot's response. It specifies the maximum number of tokens (words or subwords) that the chatbot can generate in a single response.
185
+
186
+ ### Temperature
187
+ Temperature controls the randomness of the chatbot's responses. A higher temperature (e.g., 1.0) makes the output more random and creative, while a lower temperature (e.g., 0.2) makes the output more focused and deterministic.
188
+
189
+ ### Top-P
190
+ Top-P, also known as nucleus sampling, is another way to control the randomness of the responses. It sets a threshold for the cumulative probability of the most likely tokens. The chatbot will only consider tokens whose cumulative probability is below this threshold.
191
+
192
+ ### Frequency Penalty
193
+ This parameter discourages the chatbot from repeating the same tokens or phrases too often. A higher value (e.g., 1.0) penalizes repetition more strongly, while a lower value (e.g., 0.0) has no penalty.
194
+
195
+ ### Seed
196
+ The seed is a number that initializes the random number generator used by the chatbot. If you set a specific seed, you will get the same response every time you run the chatbot with the same parameters. If you set the seed to -1, a random seed will be used, resulting in different responses each time.
197
+
198
+ ### Featured Models
199
+ You can select a featured model from the dropdown list. These models have been pre-selected for their performance and capabilities.
200
+
201
+ ### Custom Model
202
+ If you have a specific model that you want to use, you can enter its path in the Custom Model textbox. This allows you to use models that are not included in the featured list.
203
+ """
 
 
 
 
204
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  print("Gradio interface initialized.")
207
 
208
  if __name__ == "__main__":
209
+ print("Launching the demo application.")
210
+ demo.launch()