Nymbo commited on
Commit
fde397b
·
verified ·
1 Parent(s): f7c4208

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -96
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
 
4
 
5
  # Retrieve the access token from the environment variable
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
@@ -22,6 +23,7 @@ def respond(
22
  top_p,
23
  frequency_penalty,
24
  seed,
 
25
  model,
26
  custom_model
27
  ):
@@ -35,8 +37,9 @@ def respond(
35
  - top_p: top-p (nucleus) sampling
36
  - frequency_penalty: penalize repeated tokens in the output
37
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
38
- - model: the selected model
39
- - custom_model: a custom model provided by the user
 
40
  """
41
 
42
  print(f"Received message: {message}")
@@ -44,18 +47,12 @@ def respond(
44
  print(f"System message: {system_message}")
45
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
46
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
47
- print(f"Model: {model}, Custom Model: {custom_model}")
48
 
49
  # Convert seed to None if -1 (meaning random)
50
  if seed == -1:
51
  seed = None
52
 
53
- # Use custom model if provided, otherwise use selected model
54
- if custom_model.strip() != "":
55
- model_to_use = custom_model.strip()
56
- else:
57
- model_to_use = model
58
-
59
  # Construct the messages array required by the API
60
  messages = [{"role": "system", "content": system_message}]
61
 
@@ -73,13 +70,30 @@ def respond(
73
  # Append the latest user message
74
  messages.append({"role": "user", "content": message})
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Start with an empty string to build the response as tokens stream in
77
  response = ""
78
- print("Sending request to OpenAI API.")
79
 
80
  # Make the streaming request to the HF Inference API via openai-like client
81
  for message_chunk in client.chat.completions.create(
82
- model=model_to_use, # Use the selected or custom model
83
  max_tokens=max_tokens,
84
  stream=True, # Stream the response
85
  temperature=temperature,
@@ -91,36 +105,34 @@ def respond(
91
  # Extract the token text from the response chunk
92
  token_text = message_chunk.choices[0].delta.content
93
  print(f"Received token: {token_text}")
94
- response += token_text
95
- yield response
96
 
97
- print("Completed response generation.")
 
 
 
98
 
99
- # Create a Chatbot component with a specified height
100
- chatbot = gr.Chatbot(height=600)
101
- print("Chatbot interface created.")
102
 
103
- # List of placeholder models for demonstration
104
  models_list = [
105
- "meta-llama/Llama-3.3-70B-Instruct",
106
- "meta-llama/Llama-2-70B-chat",
107
- "google/flan-t5-xl"
 
108
  ]
109
 
110
- # Function to filter models based on search input
111
- def filter_models(search_term):
112
- filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
113
- return gr.update(choices=filtered_models)
114
 
115
  # Create the Gradio ChatInterface
116
- # Adding additional fields for model selection and parameters
117
  demo = gr.ChatInterface(
118
  respond,
119
  additional_inputs=[
120
  gr.Textbox(value="", label="System message"),
121
- gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
122
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
123
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
124
  gr.Slider(
125
  minimum=-2.0,
126
  maximum=2.0,
@@ -130,80 +142,90 @@ demo = gr.ChatInterface(
130
  ),
131
  gr.Slider(
132
  minimum=-1,
133
- maximum=65535, # Arbitrary upper limit for demonstration
134
  value=-1,
135
  step=1,
136
  label="Seed (-1 for random)"
137
  ),
138
- gr.Textbox(label="Custom Model", placeholder="Enter custom model path here"),
139
- gr.Accordion("Featured Models", open=True).update(
140
- gr.Column([
141
- gr.Textbox(label="Filter Models", placeholder="Search for a featured model...").change(
142
- filter_models, inputs="__self__", outputs="model"
143
- ),
144
- gr.Radio(label="Select a model below", value="meta-llama/Llama-3.3-70B-Instruct", choices=models_list, interactive=True, elem_id="model-radio")
145
- ])
146
- )
147
  ],
 
148
  fill_height=True,
149
  chatbot=chatbot,
150
  theme="Nymbo/Nymbo_Theme",
151
  )
152
 
153
- # Adding an "Information" tab with accordions for "Featured Models" and "Parameters Overview"
154
- with gr.Blocks(theme='Nymbo/Nymbo_Theme') as demo:
155
- with gr.Tab("Chat"):
156
- gr.Markdown("## Chat with the Model")
157
- chatbot.render()
158
- with gr.Tab("Information"):
159
- with gr.Accordion("Featured Models", open=False):
160
- gr.HTML(
161
- """
162
- <p><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending">See all available models</a></p>
163
- <table style="width:100%; text-align:center; margin:auto;">
164
- <tr>
165
- <th>Model Name</th>
166
- <th>Type</th>
167
- <th>Notes</th>
168
- </tr>
169
- <tr>
170
- <td>Llama-3.3-70B-Instruct</td>
171
- <td>Instruction</td>
172
- <td>High performance</td>
173
- </tr>
174
- <tr>
175
- <td>Llama-2-70B-chat</td>
176
- <td>Chat</td>
177
- <td>Conversational</td>
178
- </tr>
179
- <tr>
180
- <td>Flan-T5-XL</td>
181
- <td>General</td>
182
- <td>Versatile</td>
183
- </tr>
184
- </table>
185
- """
186
- )
187
- with gr.Accordion("Parameters Overview", open=False):
188
- gr.Markdown(
189
- """
190
- ## Parameters Overview
191
- ### Max new tokens
192
- This slider controls the maximum number of tokens to generate in the response.
193
-
194
- ### Temperature
195
- Sampling temperature, which controls the randomness. A higher temperature makes the output more random.
196
-
197
- ### Top-P
198
- Top-p (nucleus) sampling, which controls the diversity. The model considers the smallest number of tokens whose cumulative probability exceeds the top-p threshold.
199
-
200
- ### Frequency Penalty
201
- Penalizes repeated tokens in the output, which helps to reduce repetition.
202
-
203
- ### Seed
204
- A fixed seed for reproducibility. Set to -1 for random seed.
205
- """
206
- )
207
-
208
- print("Launching the demo application.")
209
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
4
+ import time
5
 
6
  # Retrieve the access token from the environment variable
7
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
 
23
  top_p,
24
  frequency_penalty,
25
  seed,
26
+ model_filter,
27
  model,
28
  custom_model
29
  ):
 
37
  - top_p: top-p (nucleus) sampling
38
  - frequency_penalty: penalize repeated tokens in the output
39
  - seed: a fixed seed for reproducibility; -1 will mean 'random'
40
+ - model_filter: search term to filter available models
41
+ - model: the selected model from the radio choices
42
+ - custom_model: manually entered HF model path
43
  """
44
 
45
  print(f"Received message: {message}")
 
47
  print(f"System message: {system_message}")
48
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
49
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
50
+ print(f"Model Filter: {model_filter}, Selected Model: {model}, Custom Model: {custom_model}")
51
 
52
  # Convert seed to None if -1 (meaning random)
53
  if seed == -1:
54
  seed = None
55
 
 
 
 
 
 
 
56
  # Construct the messages array required by the API
57
  messages = [{"role": "system", "content": system_message}]
58
 
 
70
  # Append the latest user message
71
  messages.append({"role": "user", "content": message})
72
 
73
+ # Determine the model to use
74
+ # Set the API URL based on the selected model or custom model
75
+ if custom_model.strip() != "":
76
+ api_model = custom_model.strip()
77
+ else:
78
+ if model == "Llama-3-70B-Instruct":
79
+ api_model = "meta-llama/Llama-3.3-70B-Instruct"
80
+ elif model == "Mistral-7B-Instruct-v0.2":
81
+ api_model = "mistralai/Mistral-7B-Instruct-v0.2"
82
+ elif model == "OpenHermes-2.5-Mistral-7B":
83
+ api_model = "teknium/OpenHermes-2.5-Mistral-7B"
84
+ elif model == "Phi-2":
85
+ api_model = "microsoft/Phi-2"
86
+ else:
87
+ api_model = "meta-llama/Llama-3.3-70B-Instruct"
88
+ print(f"Using model: {api_model}")
89
+
90
  # Start with an empty string to build the response as tokens stream in
91
  response = ""
92
+ print(f"Sending request to OpenAI API, using model {api_model}.")
93
 
94
  # Make the streaming request to the HF Inference API via openai-like client
95
  for message_chunk in client.chat.completions.create(
96
+ model=api_model,
97
  max_tokens=max_tokens,
98
  stream=True, # Stream the response
99
  temperature=temperature,
 
105
  # Extract the token text from the response chunk
106
  token_text = message_chunk.choices[0].delta.content
107
  print(f"Received token: {token_text}")
 
 
108
 
109
+ # Check if token_text is None before appending
110
+ if token_text is not None:
111
+ response += token_text
112
+ yield response
113
 
114
+ print("Completed response generation.")
 
 
115
 
116
+ # Placeholder list of models for the accordion
117
  models_list = [
118
+ "Llama-3-70B-Instruct",
119
+ "Mistral-7B-Instruct-v0.2",
120
+ "OpenHermes-2.5-Mistral-7B",
121
+ "Phi-2",
122
  ]
123
 
124
+ # Create a Chatbot component with a specified height
125
+ chatbot = gr.Chatbot(height=600)
126
+ print("Chatbot interface created.")
 
127
 
128
  # Create the Gradio ChatInterface
 
129
  demo = gr.ChatInterface(
130
  respond,
131
  additional_inputs=[
132
  gr.Textbox(value="", label="System message"),
133
+ gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
134
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
135
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
136
  gr.Slider(
137
  minimum=-2.0,
138
  maximum=2.0,
 
142
  ),
143
  gr.Slider(
144
  minimum=-1,
145
+ maximum=65535,
146
  value=-1,
147
  step=1,
148
  label="Seed (-1 for random)"
149
  ),
150
+ gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1),
151
+ gr.Radio(label="Select a Featured Model", choices=models_list, value="Llama-3-70B-Instruct"),
152
+ gr.Textbox(label="Custom Model", placeholder="Enter Hugging Face model path", lines=1),
 
 
 
 
 
 
153
  ],
154
+ additional_inputs_accordion=gr.Accordion("Advanced Parameters", open=False),
155
  fill_height=True,
156
  chatbot=chatbot,
157
  theme="Nymbo/Nymbo_Theme",
158
  )
159
 
160
+ # Add the "Information" tab to the demo
161
+ with gr.Tab("Information", parent=demo):
162
+ with gr.Accordion("Featured Models", open=True):
163
+ gr.HTML(
164
+ """
165
+ <table style="width:100%; text-align:center; margin:auto;">
166
+ <tr>
167
+ <th>Model Name</th>
168
+ <th>Provider</th>
169
+ <th>Notes</th>
170
+ </tr>
171
+ <tr>
172
+ <td>Llama-3-70B-Instruct</td>
173
+ <td>Meta</td>
174
+ <td>Powerful large language model.</td>
175
+ </tr>
176
+ <tr>
177
+ <td>Mistral-7B-Instruct-v0.2</td>
178
+ <td>Mistral AI</td>
179
+ <td>Efficient and versatile model.</td>
180
+ </tr>
181
+ <tr>
182
+ <td>OpenHermes-2.5-Mistral-7B</td>
183
+ <td>Teknium</td>
184
+ <td>Community-driven, fine-tuned model.</td>
185
+ </tr>
186
+ <tr>
187
+ <td>Phi-2</td>
188
+ <td>Microsoft</td>
189
+ <td>Compact yet powerful model.</td>
190
+ </tr>
191
+ </table>
192
+ """
193
+ )
194
+ with gr.Accordion("Parameters Overview", open=False):
195
+ gr.Markdown(
196
+ """
197
+ ## System Message
198
+ ###### The system message sets the behavior and persona of the chatbot. It's a way to provide context and instructions to the AI. For example, you can tell it to act as a helpful assistant, a storyteller, or any other role.
199
+ ## Max New Tokens
200
+ ###### This setting limits the length of the response generated by the AI. A higher number allows for longer, more detailed responses, while a lower number keeps the responses concise.
201
+ ## Temperature
202
+ ###### Temperature controls the randomness of the AI's output. A higher temperature makes the responses more creative and varied, while a lower temperature makes them more predictable and focused.
203
+ ## Top-P (Nucleus Sampling)
204
+ ###### Top-P sampling is a way to control the diversity of the AI's responses. It sets a threshold for the cumulative probability of the most likely next words. The AI then randomly selects from the words whose probabilities add up to this threshold. A lower Top-P value means less diversity.
205
+ ## Frequency Penalty
206
+ ###### Frequency penalty discourages the AI from repeating the same words or phrases too often in its responses. A higher penalty means the AI is less likely to repeat itself.
207
+ ## Seed
208
+ ###### The seed is a starting point for the random number generator that influences the AI's responses. If you set a specific seed, you'll get the same response every time you use that seed with the same prompt and settings. If you set it to -1, the AI will generate a new seed each time, leading to different responses.
209
+ ## Featured Models
210
+ ###### This section lists pre-selected models that are known to perform well. You can filter the list by typing in the search box.
211
+ ## Custom Model
212
+ ###### If you want to use a model that's not in the featured list, you can enter its Hugging Face model path here.
213
+ ### Feel free to experiment with these settings to see how they affect the AI's responses. Happy chatting!
214
+ """
215
+ )
216
+
217
+ # Filter models function
218
+ def filter_models(search_term, model_radio):
219
+ filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
220
+ if not filtered_models:
221
+ filtered_models = ["No matching models"] # Provide feedback
222
+ return gr.Radio.update(choices=filtered_models)
223
+
224
+ # Update model list when search box is used
225
+ demo.additional_inputs[6].change(filter_models, inputs=[demo.additional_inputs[6], demo.additional_inputs[7]], outputs=demo.additional_inputs[7])
226
+
227
+ print("Gradio interface initialized.")
228
+
229
+ if __name__ == "__main__":
230
+ print("Launching the demo application.")
231
+ demo.queue().launch()