Nymbo commited on
Commit
c20c4dd
·
verified ·
1 Parent(s): b56d11c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -180
app.py CHANGED
@@ -22,31 +22,20 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
- model
 
26
  ):
27
  """
28
- This function handles the chatbot response. It takes in:
29
- - message: the user's new message
30
- - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
31
- - system_message: the system prompt
32
- - max_tokens: the maximum number of tokens to generate in the response
33
- - temperature: sampling temperature
34
- - top_p: top-p (nucleus) sampling
35
- - frequency_penalty: penalize repeated tokens in the output
36
- - seed: a fixed seed for reproducibility; -1 will mean 'random'
37
- - model: the selected model for text generation
38
  """
39
-
40
  print(f"Received message: {message}")
41
  print(f"History: {history}")
42
  print(f"System message: {system_message}")
43
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
- print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
45
- print(f"Model: {model}")
46
 
47
- # Convert seed to None if -1 (meaning random)
48
- if seed == -1:
49
- seed = None
50
 
51
  # Construct the messages array required by the API
52
  messages = [{"role": "system", "content": system_message}]
@@ -69,15 +58,15 @@ def respond(
69
  response = ""
70
  print("Sending request to OpenAI API.")
71
 
72
- # Make the streaming request to the HF Inference API via openai-like client
73
  for message_chunk in client.chat.completions.create(
74
- model=model, # Use the selected model
75
  max_tokens=max_tokens,
76
- stream=True, # Stream the response
77
  temperature=temperature,
78
  top_p=top_p,
79
- frequency_penalty=frequency_penalty, # <-- NEW
80
- seed=seed, # <-- NEW
81
  messages=messages,
82
  ):
83
  # Extract the token text from the response chunk
@@ -88,179 +77,88 @@ def respond(
88
 
89
  print("Completed response generation.")
90
 
91
- # Create a Chatbot component with a specified height
92
  chatbot = gr.Chatbot(height=600)
93
  print("Chatbot interface created.")
94
 
95
- # Create the Gradio ChatInterface
96
- # We add two new sliders for Frequency Penalty and Seed
97
- demo = gr.ChatInterface(
98
- respond,
99
- additional_inputs=[
100
- gr.Textbox(value="", label="System message"),
101
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
102
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
104
- gr.Slider(
105
- minimum=-2.0,
106
- maximum=2.0,
107
- value=0.0,
108
- step=0.1,
109
- label="Frequency Penalty"
110
- ),
111
- gr.Slider(
112
- minimum=-1,
113
- maximum=65535, # Arbitrary upper limit for demonstration
114
- value=-1,
115
- step=1,
116
- label="Seed (-1 for random)"
117
- ),
118
- gr.Textbox(label="Custom Model", info="Model Hugging Face path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct"),
119
- ],
120
- fill_height=True,
121
- chatbot=chatbot,
122
- theme="Nymbo/Nymbo_Theme",
123
- )
124
- print("Gradio interface initialized.")
125
-
126
- # Custom CSS to hide the footer in the interface
127
- css = """
128
- * {}
129
- footer {visibility: hidden !important;}
130
- """
131
-
132
- print("Initializing Gradio interface...") # Debug log
133
 
134
- # Define the Gradio interface
135
- with gr.Blocks(theme='Nymbo/Nymbo_Theme_5') as textgen:
136
- # Tab for basic settings
137
- with gr.Tab("Basic Settings"):
138
  with gr.Row():
139
- with gr.Column(elem_id="prompt-container"):
140
- with gr.Row():
141
- # Textbox for user to input the prompt
142
- text_prompt = gr.Textbox(label="Prompt", placeholder="Enter a prompt here", lines=3, elem_id="prompt-text-input")
143
- with gr.Row():
144
- # Textbox for custom model input
145
- custom_model = gr.Textbox(label="Custom Model", info="Model Hugging Face path (optional)", placeholder="meta-llama/Llama-3.3-70B-Instruct")
146
- with gr.Row():
147
- # Accordion for selecting the model
148
- with gr.Accordion("Featured Models", open=True):
149
- # Textbox for searching models
150
- model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1, elem_id="model-search-input")
151
- models_list = (
152
- "meta-llama/Llama-3.3-70B-Instruct",
153
- "meta-llama/Llama-3.3-30B-Instruct",
154
- "meta-llama/Llama-3.3-13B-Instruct",
155
- "meta-llama/Llama-3.3-7B-Instruct",
156
- )
157
-
158
- # Radio buttons to select the desired model
159
- model = gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
160
-
161
- # Filtering models based on search input
162
- def filter_models(search_term):
163
- filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
164
- return gr.update(choices=filtered_models)
165
-
166
- # Update model list when search box is used
167
- model_search.change(filter_models, inputs=model_search, outputs=model)
168
-
169
- # Tab for advanced settings
170
- with gr.Tab("Advanced Settings"):
171
  with gr.Row():
172
- # Slider for setting the maximum number of new tokens
173
- max_tokens = gr.Slider(label="Max new tokens", value=512, minimum=1, maximum=4096, step=1)
174
  with gr.Row():
175
- # Slider for adjusting the temperature
176
- temperature = gr.Slider(label="Temperature", value=0.7, minimum=0.1, maximum=4.0, step=0.1)
177
  with gr.Row():
178
- # Slider for adjusting the top-p (nucleus) sampling
179
- top_p = gr.Slider(label="Top-P", value=0.95, minimum=0.1, maximum=1.0, step=0.05)
 
180
  with gr.Row():
181
- # Slider for adjusting the frequency penalty
182
- frequency_penalty = gr.Slider(label="Frequency Penalty", value=0.0, minimum=-2.0, maximum=2.0, step=0.1)
183
  with gr.Row():
184
- # Slider for setting the seed for reproducibility
185
- seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=65535, step=1)
186
 
187
- # Tab to provide information to the user
188
  with gr.Tab("Information"):
189
- with gr.Row():
190
- # Display a sample prompt for guidance
191
- gr.Textbox(label="Sample prompt", value="{prompt} | ultra detail, ultra elaboration, ultra quality, perfect.")
192
-
193
- # Accordion displaying featured models
194
- with gr.Accordion("Featured Models (WiP)", open=False):
195
  gr.HTML(
196
  """
197
- <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
198
- <table style="width:100%; text-align:center; margin:auto;">
199
- <tr>
200
- <th>Model Name</th>
201
- <th>Typography</th>
202
- <th>Notes</th>
203
- </tr>
204
- <tr>
205
- <td>meta-llama/Llama-3.3-70B-Instruct</td>
206
- <td>✅</td>
207
- <td></td>
208
- </tr>
209
- <tr>
210
- <td>meta-llama/Llama-3.3-30B-Instruct</td>
211
- <td>✅</td>
212
- <td></td>
213
- </tr>
214
- <tr>
215
- <td>meta-llama/Llama-3.3-13B-Instruct</td>
216
- <td>✅</td>
217
- <td></td>
218
- </tr>
219
- <tr>
220
- <td>meta-llama/Llama-3.3-7B-Instruct</td>
221
- <td>✅</td>
222
- <td></td>
223
- </tr>
224
- </table>
225
- """
226
  )
227
-
228
- # Accordion providing an overview of advanced settings
229
  with gr.Accordion("Parameters Overview", open=False):
230
  gr.Markdown(
231
- """
232
- ## System Message
233
- ###### This box is for setting the system prompt, which guides the AI's behavior and context.
234
-
235
- ## Max New Tokens
236
- ###### This slider allows you to specify the maximum number of tokens (words or parts of words) the AI will generate in response to your prompt. The default value is 512.
237
-
238
- ## Temperature
239
- ###### Temperature controls the randomness of the AI's output. A higher temperature makes the output more random and creative, while a lower temperature makes it more predictable and focused.
240
-
241
- ## Top-P (Nucleus Sampling)
242
- ###### Top-P sampling is a technique that selects the smallest set of top tokens whose cumulative probability exceeds a threshold (p). This helps in generating more coherent and relevant responses.
243
-
244
- ## Frequency Penalty
245
- ###### This parameter penalizes repeated tokens in the output, encouraging the AI to generate more diverse responses. A higher value means more penalty for repetition.
246
-
247
- ## Seed
248
- ###### The seed is a value that ensures reproducibility. If you set a specific seed, the AI will generate the same output for the same input. Setting it to -1 means the seed will be random.
249
-
250
- ### Remember, these settings are all about giving you control over the text generation process. Feel free to experiment and see what each one does. And if you're ever in doubt, the default settings are a great place to start. Happy creating!
251
- """
252
  )
253
 
254
- # Row containing the 'Run' button to trigger the text generation
255
- with gr.Row():
256
- text_button = gr.Button("Run", variant='primary', elem_id="gen-button")
257
- # Row for displaying the generated text output
258
- with gr.Row():
259
- text_output = gr.Textbox(label="Text Output", elem_id="text-output")
260
-
261
- # Set up button click event to call the respond function
262
- text_button.click(respond, inputs=[text_prompt, chatbot, gr.Textbox(value="", label="System message"), max_tokens, temperature, top_p, frequency_penalty, seed, model], outputs=text_output)
263
-
264
- print("Launching Gradio interface...") # Debug log
265
- # Launch the Gradio interface without showing the API or sharing externally
266
- textgen.launch(show_api=False, share=False)
 
 
 
 
 
 
 
22
  top_p,
23
  frequency_penalty,
24
  seed,
25
+ model,
26
+ custom_model
27
  ):
28
  """
29
+ Handles the chatbot response with given parameters.
 
 
 
 
 
 
 
 
 
30
  """
 
31
  print(f"Received message: {message}")
32
  print(f"History: {history}")
33
  print(f"System message: {system_message}")
34
+ print(f"Model: {model}, Custom Model: {custom_model}")
 
 
35
 
36
+ # Use custom model if provided, else use selected model
37
+ selected_model = custom_model.strip() if custom_model.strip() else model
38
+ print(f"Selected model: {selected_model}")
39
 
40
  # Construct the messages array required by the API
41
  messages = [{"role": "system", "content": system_message}]
 
58
  response = ""
59
  print("Sending request to OpenAI API.")
60
 
61
+ # Make the streaming request to the HF Inference API via OpenAI-like client
62
  for message_chunk in client.chat.completions.create(
63
+ model=selected_model,
64
  max_tokens=max_tokens,
65
+ stream=True,
66
  temperature=temperature,
67
  top_p=top_p,
68
+ frequency_penalty=frequency_penalty,
69
+ seed=seed if seed != -1 else None,
70
  messages=messages,
71
  ):
72
  # Extract the token text from the response chunk
 
77
 
78
  print("Completed response generation.")
79
 
80
+ # Create a Chatbot component
81
  chatbot = gr.Chatbot(height=600)
82
  print("Chatbot interface created.")
83
 
84
+ # Define the featured models for the dropdown
85
+ models_list = [
86
+ "meta-llama/Llama-3.3-70B-Instruct",
87
+ "bigscience/bloom-176b",
88
+ "gpt-j-6b",
89
+ "opt-30b",
90
+ "flan-t5-xxl",
91
+ ]
92
+
93
+ # Function to filter models based on user input
94
+ def filter_models(search_term):
95
+ return [m for m in models_list if search_term.lower() in m.lower()]
96
+
97
+ # Gradio interface
98
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
99
+ with gr.Row():
100
+ chatbot = gr.Chatbot(height=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ with gr.Tab("Chat Interface"):
 
 
 
103
  with gr.Row():
104
+ user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  with gr.Row():
106
+ system_message = gr.Textbox(value="", label="System Message")
 
107
  with gr.Row():
108
+ max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max Tokens")
109
+ temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
110
  with gr.Row():
111
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P")
112
+ frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
113
+ seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
114
  with gr.Row():
115
+ model = gr.Dropdown(label="Select a Model", choices=models_list, value="meta-llama/Llama-3.3-70B-Instruct")
116
+ custom_model = gr.Textbox(label="Custom Model", placeholder="Enter custom model path")
117
  with gr.Row():
118
+ run_button = gr.Button("Generate Response")
 
119
 
 
120
  with gr.Tab("Information"):
121
+ with gr.Accordion("Featured Models", open=False):
 
 
 
 
 
122
  gr.HTML(
123
  """
124
+ <table>
125
+ <tr><th>Model Name</th><th>Description</th></tr>
126
+ <tr><td>meta-llama/Llama-3.3-70B-Instruct</td><td>Instruction-tuned LLaMA model</td></tr>
127
+ <tr><td>bigscience/bloom-176b</td><td>Multilingual large language model</td></tr>
128
+ <tr><td>gpt-j-6b</td><td>Open-source GPT model</td></tr>
129
+ <tr><td>opt-30b</td><td>Meta's OPT model</td></tr>
130
+ <tr><td>flan-t5-xxl</td><td>Google's Flan-tuned T5 XXL</td></tr>
131
+ </table>
132
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
 
 
134
  with gr.Accordion("Parameters Overview", open=False):
135
  gr.Markdown(
136
+ """
137
+ ### Parameters Overview
138
+ - **Max Tokens**: Maximum number of tokens in the response.
139
+ - **Temperature**: Controls the randomness of responses. Lower values make the output more deterministic.
140
+ - **Top-P**: Controls the diversity of responses by limiting the token selection to a probability mass.
141
+ - **Frequency Penalty**: Penalizes repeated tokens in the output.
142
+ - **Seed**: Fixes randomness for reproducibility. Use -1 for a random seed.
143
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  )
145
 
146
+ run_button.click(
147
+ respond,
148
+ inputs=[
149
+ user_input,
150
+ chatbot.state,
151
+ system_message,
152
+ max_tokens,
153
+ temperature,
154
+ top_p,
155
+ frequency_penalty,
156
+ seed,
157
+ model,
158
+ custom_model
159
+ ],
160
+ outputs=chatbot
161
+ )
162
+
163
+ print("Launching the demo application.")
164
+ demo.launch()