davidberenstein1957 commited on
Commit
dedc4f1
·
1 Parent(s): 85e41fb

Refactor app.py: Enhance language support, client management, and conversation handling

Browse files

- Improve language system messages to be more specific and consistent
- Create a function to dynamically generate inference clients for each language
- Add a LANGUAGES_TO_CLIENT dictionary for per-language client management
- Update conversation submission and tracking with conversation_id
- Modify input handling and response generation to support multiple languages
- Simplify chat input and feedback mechanisms
- Add more detailed project explanation in the UI

Files changed (1) hide show
  1. app/app.py +150 -55
app/app.py CHANGED
@@ -14,37 +14,65 @@ from huggingface_hub import InferenceClient
14
  from pandas import DataFrame
15
 
16
  LANGUAGES: dict[str, str] = {
17
- "English": "You are a helpful assistant that speaks English.",
18
- "Spanish": "Tu eres un asistente útil que habla español.",
19
- "Hebrew": "אתה עוזר טוב שמפגש בעברית.",
20
- "Dutch": "Je bent een handige assistent die Nederlands spreekt.",
21
- "Italian": "Tu sei un assistente utile che parla italiano.",
22
- "French": "Tu es un assistant utile qui parle français.",
23
- "German": "Du bist ein hilfreicher Assistent, der Deutsch spricht.",
24
- "Portuguese": "Você é um assistente útil que fala português.",
25
- "Russian": "Ты полезный помощник, который говорит по-русски.",
26
- "Chinese": "你是一个有用的助手,会说中文。",
27
- "Japanese": "あなたは役立つ助け役で、日本語を話します。",
28
- "Korean": "당신은 유용한 도우미이며 한국어를 말합니다.",
29
  }
30
 
31
- client = InferenceClient(
32
- token=os.getenv("HF_TOKEN"),
33
- model=(
34
- os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
35
- if not os.getenv("BASE_URL")
36
- else None
37
- ),
38
- base_url=os.getenv("BASE_URL"),
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
 
42
  def add_user_message(history, message):
43
- for x in message["files"]:
44
- history.append({"role": "user", "content": {"path": x}})
45
- if message["text"] is not None:
46
- history.append({"role": "user", "content": message["text"]})
47
- return history, gr.MultimodalTextbox(value=None, interactive=False)
 
 
 
48
 
49
 
50
  def format_system_message(language: str, history: list):
@@ -128,7 +156,11 @@ def _process_rating(rating) -> int:
128
 
129
 
130
  def add_fake_like_data(
131
- history: list, session_id: str, language: str, liked: bool = False
 
 
 
 
132
  ) -> None:
133
  data = {
134
  "index": len(history) - 1,
@@ -138,19 +170,27 @@ def add_fake_like_data(
138
  _, dataframe = wrangle_like_data(
139
  gr.LikeData(target=None, data=data), history.copy()
140
  )
141
- submit_conversation(dataframe, session_id, language)
 
 
 
 
 
142
 
143
 
144
- def respond_system_message(
145
- history: list, temperature: Optional[float] = None, seed: Optional[int] = None
 
 
 
146
  ) -> list: # -> list:
147
  """Respond to the user message with a system message
148
 
149
  Return the history with the new message"""
150
  messages = format_history_as_messages(history)
151
- response = client.chat.completions.create(
152
  messages=messages,
153
- max_tokens=2000,
154
  stream=False,
155
  seed=seed,
156
  temperature=temperature,
@@ -221,7 +261,12 @@ def wrangle_like_data(x: gr.LikeData, history) -> DataFrame:
221
 
222
 
223
  def wrangle_edit_data(
224
- x: gr.EditData, history: list, dataframe: DataFrame, session_id: str, language: str
 
 
 
 
 
225
  ) -> list:
226
  """Edit the conversation and add negative feedback if assistant message is edited, otherwise regenerate the message
227
 
@@ -237,20 +282,41 @@ def wrangle_edit_data(
237
 
238
  if history[index]["role"] == "user":
239
  # Add feedback on original and corrected message
240
- add_fake_like_data(history[: index + 2], session_id, language, liked=True)
241
  add_fake_like_data(
242
- history[: index + 1] + [original_message], session_id, language
 
 
 
 
 
 
 
 
 
 
243
  )
244
- history = respond_system_message(
245
- history[: index + 1],
 
246
  temperature=random.randint(1, 100) / 100,
247
  seed=random.randint(0, 1000000),
248
  )
249
  return history
250
  else:
251
  # Add feedback on original and corrected message
252
- add_fake_like_data(history[: index + 1], session_id, language, liked=True)
253
- add_fake_like_data(history[:index] + [original_message], session_id, language)
 
 
 
 
 
 
 
 
 
 
 
254
  history = history[: index + 1]
255
  # add chosen and rejected options
256
  history[-1]["options"] = [
@@ -261,23 +327,34 @@ def wrangle_edit_data(
261
 
262
 
263
  def wrangle_retry_data(
264
- x: gr.RetryData, history: list, dataframe: DataFrame, session_id: str, language: str
 
 
 
 
 
265
  ) -> list:
266
  """Respond to the user message with a system message and add negative feedback on the original message
267
 
268
  Return the history with the new message"""
269
- add_fake_like_data(history, session_id, language)
 
 
 
 
 
270
 
271
  # Return the history without a new message
272
- history = respond_system_message(
273
- history[:-1],
 
274
  temperature=random.randint(1, 100) / 100,
275
  seed=random.randint(0, 1000000),
276
  )
277
  return history, update_dataframe(dataframe, history)
278
 
279
 
280
- def submit_conversation(dataframe, session_id, language):
281
  """ "Submit the conversation to dataset repo"""
282
  if dataframe.empty or len(dataframe) < 2:
283
  gr.Info("No feedback to submit.")
@@ -290,7 +367,7 @@ def submit_conversation(dataframe, session_id, language):
290
  "conversation": conversation,
291
  "timestamp": datetime.now().isoformat(),
292
  "session_id": session_id,
293
- "conversation_id": str(uuid.uuid4()),
294
  "language": language,
295
  }
296
  save_feedback(input_object=conversation_data)
@@ -317,7 +394,9 @@ with gr.Blocks(css=css) as demo:
317
 
318
  with gr.Accordion("Explanation") as explanation:
319
  gr.Markdown(f"""
320
- FeeL is a collaboration between Hugging Face and MIT. It is a community-driven project to provide a real-time feedback loop for VLMs, where your feedback is continuously used to train the model. The [dataset](https://huggingface.co/datasets/{scheduler.repo_id}) and [code](https://github.com/huggingface/feel) are public.
 
 
321
 
322
  Start by selecting your language, chat with the model with text and images and provide feedback in different ways.
323
 
@@ -325,7 +404,7 @@ with gr.Blocks(css=css) as demo:
325
  - 👍/👎 Like or dislike a message
326
  - 🔄 Regenerate a message
327
 
328
- Some feedback is automatically submitted allowing you to continue chatting, but you can also submit and reset the conversation by clicking "💾 Submit conversation" (under the chat) or trash the conversation by clicking "🗑️" (upper right corner).
329
  """)
330
  language = gr.Dropdown(
331
  choices=list(LANGUAGES.keys()), label="Language", interactive=True
@@ -337,6 +416,12 @@ with gr.Blocks(css=css) as demo:
337
  visible=False,
338
  )
339
 
 
 
 
 
 
 
340
  chatbot = gr.Chatbot(
341
  elem_id="chatbot",
342
  editable="all",
@@ -351,15 +436,15 @@ with gr.Blocks(css=css) as demo:
351
  feedback_options=["Like", "Dislike"],
352
  )
353
 
354
- chat_input = gr.MultimodalTextbox(
355
  interactive=True,
356
- file_count="multiple",
357
  placeholder="Enter message or upload file...",
358
  show_label=False,
359
  submit_btn=True,
360
  )
361
 
362
- dataframe = gr.Dataframe(wrap=True, label="Collected feedback")
 
363
 
364
  submit_btn = gr.Button(
365
  value="💾 Submit conversation",
@@ -379,7 +464,7 @@ with gr.Blocks(css=css) as demo:
379
  fn=add_user_message,
380
  inputs=[chatbot, chat_input],
381
  outputs=[chatbot, chat_input],
382
- ).then(respond_system_message, chatbot, chatbot, api_name="bot_response").then(
383
  lambda: gr.Textbox(interactive=True), None, [chat_input]
384
  ).then(update_dataframe, inputs=[dataframe, chatbot], outputs=[dataframe])
385
 
@@ -388,25 +473,35 @@ with gr.Blocks(css=css) as demo:
388
  inputs=[chatbot],
389
  outputs=[chatbot, dataframe],
390
  like_user_message=False,
 
 
 
 
391
  )
392
 
393
  chatbot.retry(
394
  fn=wrangle_retry_data,
395
- inputs=[chatbot, dataframe, session_id, language],
396
  outputs=[chatbot, dataframe],
397
  )
398
 
399
  chatbot.edit(
400
  fn=wrangle_edit_data,
401
- inputs=[chatbot, dataframe, session_id, language],
402
  outputs=[chatbot],
403
  ).then(update_dataframe, inputs=[dataframe, chatbot], outputs=[dataframe])
404
 
405
- submit_btn.click(
 
406
  fn=submit_conversation,
407
- inputs=[dataframe, session_id, language],
408
  outputs=[dataframe, chatbot],
 
 
 
 
409
  )
 
410
  demo.load(
411
  lambda: str(uuid.uuid4()),
412
  inputs=[],
 
14
  from pandas import DataFrame
15
 
16
  LANGUAGES: dict[str, str] = {
17
+ "English": "You are a helpful assistant. Always respond to requests in fluent and natural English, regardless of the language used by the user.",
18
+ "Dutch": "Je bent een behulpzame assistent die uitsluitend in het Nederlands communiceert. Beantwoord alle vragen en verzoeken in vloeiend en natuurlijk Nederlands, ongeacht de taal waarin de gebruiker schrijft.",
19
+ "Italian": "Sei un assistente utile e rispondi sempre in italiano in modo naturale e fluente, indipendentemente dalla lingua utilizzata dall'utente.",
20
+ "Spanish": "Eres un asistente útil que siempre responde en español de manera fluida y natural, independientemente del idioma utilizado por el usuario.",
21
+ "French": "Tu es un assistant utile qui répond toujours en français de manière fluide et naturelle, quelle que soit la langue utilisée par l'utilisateur.",
22
+ "German": "Du bist ein hilfreicher Assistent, der stets auf Deutsch in einer natürlichen und fließenden Weise antwortet, unabhängig von der Sprache des Benutzers.",
23
+ "Portuguese": "Você é um assistente útil que sempre responde em português de forma natural e fluente, independentemente do idioma utilizado pelo usuário.",
24
+ "Russian": "Ты полезный помощник, который всегда отвечает на русском языке плавно и естественно, независимо от языка пользователя.",
25
+ "Chinese": "你是一个有用的助手,总是用流畅自然的中文回答问题,无论用户使用哪种语言。",
26
+ "Japanese": "あなたは役に立つアシスタントであり、常に流暢で自然な日本語で応答します。ユーザーが使用する言語に関係なく、日本語で対応してください。",
27
+ "Korean": "당신은 유용한 도우미이며, 항상 유창하고 자연스러운 한국어로 응답합니다. 사용자가 어떤 언어를 사용하든 한국어로 대답하세요.",
 
28
  }
29
 
30
+
31
+ BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
32
+
33
+
34
+ def create_inference_client(
35
+ model: Optional[str] = None, base_url: Optional[str] = None
36
+ ) -> InferenceClient:
37
+ """Create an InferenceClient instance with the given model or environment settings.
38
+
39
+ Args:
40
+ model: Optional model identifier to use. If not provided, will use environment settings.
41
+
42
+ Returns:
43
+ InferenceClient: Configured client instance
44
+ """
45
+ return InferenceClient(
46
+ token=os.getenv("HF_TOKEN"),
47
+ model=model if model else (BASE_MODEL if not base_url else None),
48
+ base_url=base_url,
49
+ )
50
+
51
+
52
+ LANGUAGES_TO_CLIENT = {
53
+ "English": create_inference_client(),
54
+ "Dutch": create_inference_client(),
55
+ "Italian": create_inference_client(),
56
+ "Spanish": create_inference_client(),
57
+ "French": create_inference_client(),
58
+ "German": create_inference_client(),
59
+ "Portuguese": create_inference_client(),
60
+ "Russian": create_inference_client(),
61
+ "Chinese": create_inference_client(),
62
+ "Japanese": create_inference_client(),
63
+ "Korean": create_inference_client(),
64
+ }
65
 
66
 
67
  def add_user_message(history, message):
68
+ if isinstance(message, dict) and "files" in message:
69
+ for x in message["files"]:
70
+ history.append({"role": "user", "content": {"path": x}})
71
+ if message["text"] is not None:
72
+ history.append({"role": "user", "content": message["text"]})
73
+ else:
74
+ history.append({"role": "user", "content": message})
75
+ return history, gr.Textbox(value=None, interactive=False)
76
 
77
 
78
  def format_system_message(language: str, history: list):
 
156
 
157
 
158
  def add_fake_like_data(
159
+ history: list,
160
+ conversation_id: str,
161
+ session_id: str,
162
+ language: str,
163
+ liked: bool = False,
164
  ) -> None:
165
  data = {
166
  "index": len(history) - 1,
 
170
  _, dataframe = wrangle_like_data(
171
  gr.LikeData(target=None, data=data), history.copy()
172
  )
173
+ submit_conversation(
174
+ dataframe=dataframe,
175
+ conversation_id=conversation_id,
176
+ session_id=session_id,
177
+ language=language,
178
+ )
179
 
180
 
181
+ def respond(
182
+ history: list,
183
+ language: str,
184
+ temperature: Optional[float] = None,
185
+ seed: Optional[int] = None,
186
  ) -> list: # -> list:
187
  """Respond to the user message with a system message
188
 
189
  Return the history with the new message"""
190
  messages = format_history_as_messages(history)
191
+ response = LANGUAGES_TO_CLIENT[language].chat.completions.create(
192
  messages=messages,
193
+ max_tokens=4000,
194
  stream=False,
195
  seed=seed,
196
  temperature=temperature,
 
261
 
262
 
263
  def wrangle_edit_data(
264
+ x: gr.EditData,
265
+ history: list,
266
+ dataframe: DataFrame,
267
+ conversation_id: str,
268
+ session_id: str,
269
+ language: str,
270
  ) -> list:
271
  """Edit the conversation and add negative feedback if assistant message is edited, otherwise regenerate the message
272
 
 
282
 
283
  if history[index]["role"] == "user":
284
  # Add feedback on original and corrected message
 
285
  add_fake_like_data(
286
+ history=history[: index + 2],
287
+ conversation_id=conversation_id,
288
+ session_id=session_id,
289
+ language=language,
290
+ liked=True,
291
+ )
292
+ add_fake_like_data(
293
+ history=history[: index + 1] + [original_message],
294
+ conversation_id=conversation_id,
295
+ session_id=session_id,
296
+ language=language,
297
  )
298
+ history = respond(
299
+ history=history[: index + 1],
300
+ language=language,
301
  temperature=random.randint(1, 100) / 100,
302
  seed=random.randint(0, 1000000),
303
  )
304
  return history
305
  else:
306
  # Add feedback on original and corrected message
307
+ add_fake_like_data(
308
+ history=history[: index + 1],
309
+ conversation_id=conversation_id,
310
+ session_id=session_id,
311
+ language=language,
312
+ liked=True,
313
+ )
314
+ add_fake_like_data(
315
+ history=history[:index] + [original_message],
316
+ conversation_id=conversation_id,
317
+ session_id=session_id,
318
+ language=language,
319
+ )
320
  history = history[: index + 1]
321
  # add chosen and rejected options
322
  history[-1]["options"] = [
 
327
 
328
 
329
  def wrangle_retry_data(
330
+ x: gr.RetryData,
331
+ history: list,
332
+ dataframe: DataFrame,
333
+ conversation_id: str,
334
+ session_id: str,
335
+ language: str,
336
  ) -> list:
337
  """Respond to the user message with a system message and add negative feedback on the original message
338
 
339
  Return the history with the new message"""
340
+ add_fake_like_data(
341
+ history=history,
342
+ conversation_id=conversation_id,
343
+ session_id=session_id,
344
+ language=language,
345
+ )
346
 
347
  # Return the history without a new message
348
+ history = respond(
349
+ history=history[:-1],
350
+ language=language,
351
  temperature=random.randint(1, 100) / 100,
352
  seed=random.randint(0, 1000000),
353
  )
354
  return history, update_dataframe(dataframe, history)
355
 
356
 
357
+ def submit_conversation(dataframe, conversation_id, session_id, language):
358
  """ "Submit the conversation to dataset repo"""
359
  if dataframe.empty or len(dataframe) < 2:
360
  gr.Info("No feedback to submit.")
 
367
  "conversation": conversation,
368
  "timestamp": datetime.now().isoformat(),
369
  "session_id": session_id,
370
+ "conversation_id": conversation_id,
371
  "language": language,
372
  }
373
  save_feedback(input_object=conversation_data)
 
394
 
395
  with gr.Accordion("Explanation") as explanation:
396
  gr.Markdown(f"""
397
+ FeeL is a collaboration between Hugging Face and MIT.
398
+ It is a community-driven project to provide a real-time feedback loop for VLMs, where your feedback is continuously used to fine-tune the underlying models.
399
+ The [dataset](https://huggingface.co/datasets/{scheduler.repo_id}), [code](https://github.com/huggingface/feel) and [models](https://huggingface.co/collections/feel-fl/feel-models-67a9b6ef0fdd554315e295e8) are public.
400
 
401
  Start by selecting your language, chat with the model with text and images and provide feedback in different ways.
402
 
 
404
  - 👍/👎 Like or dislike a message
405
  - 🔄 Regenerate a message
406
 
407
+ Feedback is automatically submitted allowing you to continue chatting, but you can also submit and reset the conversation by clicking "💾 Submit conversation" (under the chat) or trash the conversation by clicking "🗑️" (upper right corner).
408
  """)
409
  language = gr.Dropdown(
410
  choices=list(LANGUAGES.keys()), label="Language", interactive=True
 
416
  visible=False,
417
  )
418
 
419
+ conversation_id = gr.Textbox(
420
+ interactive=False,
421
+ value=str(uuid.uuid4()),
422
+ visible=False,
423
+ )
424
+
425
  chatbot = gr.Chatbot(
426
  elem_id="chatbot",
427
  editable="all",
 
436
  feedback_options=["Like", "Dislike"],
437
  )
438
 
439
+ chat_input = gr.Textbox(
440
  interactive=True,
 
441
  placeholder="Enter message or upload file...",
442
  show_label=False,
443
  submit_btn=True,
444
  )
445
 
446
+ with gr.Accordion("Collected feedback", open=False):
447
+ dataframe = gr.Dataframe(wrap=True, label="Collected feedback")
448
 
449
  submit_btn = gr.Button(
450
  value="💾 Submit conversation",
 
464
  fn=add_user_message,
465
  inputs=[chatbot, chat_input],
466
  outputs=[chatbot, chat_input],
467
+ ).then(respond, inputs=[chatbot, language], outputs=[chatbot]).then(
468
  lambda: gr.Textbox(interactive=True), None, [chat_input]
469
  ).then(update_dataframe, inputs=[dataframe, chatbot], outputs=[dataframe])
470
 
 
473
  inputs=[chatbot],
474
  outputs=[chatbot, dataframe],
475
  like_user_message=False,
476
+ ).then(
477
+ submit_conversation,
478
+ inputs=[dataframe, conversation_id, session_id, language],
479
+ outputs=[dataframe, chatbot],
480
  )
481
 
482
  chatbot.retry(
483
  fn=wrangle_retry_data,
484
+ inputs=[chatbot, dataframe, conversation_id, session_id, language],
485
  outputs=[chatbot, dataframe],
486
  )
487
 
488
  chatbot.edit(
489
  fn=wrangle_edit_data,
490
+ inputs=[chatbot, dataframe, conversation_id, session_id, language],
491
  outputs=[chatbot],
492
  ).then(update_dataframe, inputs=[dataframe, chatbot], outputs=[dataframe])
493
 
494
+ gr.on(
495
+ triggers=[submit_btn.click, chatbot.clear],
496
  fn=submit_conversation,
497
+ inputs=[dataframe, conversation_id, session_id, language],
498
  outputs=[dataframe, chatbot],
499
+ ).then(
500
+ fn=lambda x: str(uuid.uuid4()),
501
+ inputs=[conversation_id],
502
+ outputs=[conversation_id],
503
  )
504
+
505
  demo.load(
506
  lambda: str(uuid.uuid4()),
507
  inputs=[],