Tamim3 commited on
Commit
4c033fe
·
verified ·
1 Parent(s): fae165e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -29
app.py CHANGED
@@ -1,10 +1,16 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
8
 
9
  def respond(
10
  message,
@@ -13,51 +19,210 @@ def respond(
13
  max_tokens,
14
  temperature,
15
  top_p,
 
 
 
16
  ):
 
 
 
 
 
 
 
 
 
 
 
 
17
  messages = [{"role": "system", "content": system_message}]
 
18
 
 
19
  for val in history:
20
- if val[0]:
21
- messages.append({"role": "user", "content": val[0]})
22
- if val[1]:
23
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
24
 
 
25
  messages.append({"role": "user", "content": message})
 
 
 
 
 
26
 
 
27
  response = ""
 
28
 
29
- for message in client.chat_completion(
30
- messages,
31
  max_tokens=max_tokens,
32
  stream=True,
33
  temperature=temperature,
34
  top_p=top_p,
 
 
 
35
  ):
36
- token = message.choices[0].delta.content
37
-
38
- response += token
39
  yield response
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
- respond,
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
  ],
 
 
 
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+
5
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
6
+ print("Access token loaded.")
7
+
8
+ client = OpenAI(
9
+ base_url="https://api-inference.huggingface.co/v1/",
10
+ api_key=ACCESS_TOKEN,
11
+ )
12
+ print("OpenAI client initialized.")
13
 
 
 
 
 
14
 
15
  def respond(
16
  message,
 
19
  max_tokens,
20
  temperature,
21
  top_p,
22
+ frequency_penalty,
23
+ seed,
24
+ custom_model
25
  ):
26
+
27
+ print(f"Received message: {message}")
28
+ print(f"History: {history}")
29
+ print(f"System message: {system_message}")
30
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
31
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
32
+ print(f"Selected model (custom_model): {custom_model}")
33
+
34
+ # Convert seed to None if -1 (meaning random)
35
+ if seed == -1:
36
+ seed = None
37
+
38
  messages = [{"role": "system", "content": system_message}]
39
+ print("Initial messages array constructed.")
40
 
41
+ # Add conversation history to the context
42
  for val in history:
43
+ user_part = val[0]
44
+ assistant_part = val[1]
45
+ if user_part:
46
+ messages.append({"role": "user", "content": user_part})
47
+ print(f"Added user message to context: {user_part}")
48
+ if assistant_part:
49
+ messages.append({"role": "assistant", "content": assistant_part})
50
+ print(f"Added assistant message to context: {assistant_part}")
51
 
52
+ # Append the latest user message
53
  messages.append({"role": "user", "content": message})
54
+ print("Latest user message appended.")
55
+
56
+ # If user provided a model, use that; otherwise, fall back to a default model
57
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
58
+ print(f"Model selected for inference: {model_to_use}")
59
 
60
+ # Start with an empty string to build the response as tokens stream in
61
  response = ""
62
+ print("Sending request to OpenAI API.")
63
 
64
+ for message_chunk in client.chat.completions.create(
65
+ model=model_to_use,
66
  max_tokens=max_tokens,
67
  stream=True,
68
  temperature=temperature,
69
  top_p=top_p,
70
+ frequency_penalty=frequency_penalty,
71
+ seed=seed,
72
+ messages=messages,
73
  ):
74
+ token_text = message_chunk.choices[0].delta.content
75
+ print(f"Received token: {token_text}")
76
+ response += token_text
77
  yield response
78
 
79
+ print("Completed response generation.")
80
+
81
+ # GRADIO UI
82
+
83
+ chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", likeable=True, layout="panel")
84
+ print("Chatbot interface created.")
85
+
86
+ system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
87
+
88
+ max_tokens_slider = gr.Slider(
89
+ minimum=1,
90
+ maximum=4096,
91
+ value=512,
92
+ step=1,
93
+ label="Max new tokens"
94
+ )
95
+ temperature_slider = gr.Slider(
96
+ minimum=0.1,
97
+ maximum=4.0,
98
+ value=0.7,
99
+ step=0.1,
100
+ label="Temperature"
101
+ )
102
+ top_p_slider = gr.Slider(
103
+ minimum=0.1,
104
+ maximum=1.0,
105
+ value=0.95,
106
+ step=0.05,
107
+ label="Top-P"
108
+ )
109
+ frequency_penalty_slider = gr.Slider(
110
+ minimum=-2.0,
111
+ maximum=2.0,
112
+ value=0.0,
113
+ step=0.1,
114
+ label="Frequency Penalty"
115
+ )
116
+ seed_slider = gr.Slider(
117
+ minimum=-1,
118
+ maximum=65535,
119
+ value=-1,
120
+ step=1,
121
+ label="Seed (-1 for random)"
122
+ )
123
+
124
+ # The custom_model_box is what the respond function sees as "custom_model"
125
+ custom_model_box = gr.Textbox(
126
+ value="",
127
+ label="Custom Model",
128
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
129
+ placeholder="meta-llama/Llama-3.3-70B-Instruct"
130
+ )
131
+
132
+ def set_custom_model_from_radio(selected):
133
+ """
134
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
135
+ We will update the Custom Model text box with that selection automatically.
136
+ """
137
+ print(f"Featured model selected: {selected}")
138
+ return selected
139
 
 
 
 
140
  demo = gr.ChatInterface(
141
+ fn=respond,
142
  additional_inputs=[
143
+ system_message_box,
144
+ max_tokens_slider,
145
+ temperature_slider,
146
+ top_p_slider,
147
+ frequency_penalty_slider,
148
+ seed_slider,
149
+ custom_model_box,
 
 
 
150
  ],
151
+ fill_height=True,
152
+ chatbot=chatbot,
153
+ theme="Nymbo/Nymbo_Theme",
154
  )
155
+ print("ChatInterface object created.")
156
+
157
+ with demo:
158
+ with gr.Accordion("Model Selection", open=False):
159
+ model_search_box = gr.Textbox(
160
+ label="Filter Models",
161
+ placeholder="Search for a featured model...",
162
+ lines=1
163
+ )
164
+ print("Model search box created.")
165
+
166
+ models_list = [
167
+ "meta-llama/Llama-3.3-70B-Instruct",
168
+ "meta-llama/Llama-3.1-70B-Instruct",
169
+ "meta-llama/Llama-3.0-70B-Instruct",
170
+ "meta-llama/Llama-3.2-3B-Instruct",
171
+ "meta-llama/Llama-3.2-1B-Instruct",
172
+ "meta-llama/Llama-3.1-8B-Instruct",
173
+ "NousResearch/Hermes-3-Llama-3.1-8B",
174
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
175
+ "mistralai/Mistral-Nemo-Instruct-2407",
176
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
177
+ "mistralai/Mistral-7B-Instruct-v0.3",
178
+ "mistralai/Mistral-7B-Instruct-v0.2",
179
+ "Qwen/Qwen2.5-72B-Instruct",
180
+ "Qwen/Qwen2.5-3B-Instruct",
181
+ "Qwen/Qwen2.5-0.5B-Instruct",
182
+ "Qwen/QwQ-32B-Preview",
183
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
184
+ "microsoft/Phi-3.5-mini-instruct",
185
+ "microsoft/Phi-3-mini-128k-instruct",
186
+ "microsoft/Phi-3-mini-4k-instruct",
187
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
188
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
189
+ "HuggingFaceH4/zephyr-7b-beta",
190
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
191
+ "tiiuae/falcon-7b-instruct",
192
+ "01-ai/Yi-1.5-34B-Chat",
193
+ ]
194
+ print("Models list initialized.")
195
+
196
+ featured_model_radio = gr.Radio(
197
+ label="Select a model below",
198
+ choices=models_list,
199
+ value="meta-llama/Llama-3.3-70B-Instruct",
200
+ interactive=True
201
+ )
202
+ print("Featured models radio button created.")
203
+
204
+ def filter_models(search_term):
205
+ print(f"Filtering models with search term: {search_term}")
206
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
207
+ print(f"Filtered models: {filtered}")
208
+ return gr.update(choices=filtered)
209
+
210
+ model_search_box.change(
211
+ fn=filter_models,
212
+ inputs=model_search_box,
213
+ outputs=featured_model_radio
214
+ )
215
+ print("Model search box change event linked.")
216
+
217
+ featured_model_radio.change(
218
+ fn=set_custom_model_from_radio,
219
+ inputs=featured_model_radio,
220
+ outputs=custom_model_box
221
+ )
222
+ print("Featured model radio button change event linked.")
223
 
224
+ print("Gradio interface initialized.")
225
 
226
  if __name__ == "__main__":
227
+ print("Launching the demo application.")
228
+ demo.launch()