Garvitj commited on
Commit
b835d1f
·
verified ·
1 Parent(s): c8185df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -36
app.py CHANGED
@@ -259,60 +259,123 @@ def transcribe_and_predict_video(video):
259
  MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
260
  client = InferenceClient(MODEL_NAME)
261
 
262
- # Chatbot response function
263
- def respond(video, history, system_message, max_tokens, temperature, top_p):
264
- video_path = video.name # Get the uploaded video file path
265
-
266
- # Process the video for emotions & text
267
- result = transcribe_and_predict_video(video_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
- # Construct a system prompt with extracted emotions & text
270
- system_prompt = (
271
- f"{system_message}\n\n"
272
- f"Detected Emotions:\n"
273
- f"- Text Emotion: {result['text_emotion']}\n"
274
- f"- Audio Emotion: {result['audio_emotion']}\n"
275
- f"- Image Emotion: {result['image_emotion']}\n\n"
276
- f"Extracted Speech: {result['extracted_text']}"
277
- )
278
 
279
- messages = [{"role": "system", "content": system_prompt}]
 
280
 
 
281
  for val in history:
282
  if val[0]:
283
  messages.append({"role": "user", "content": val[0]})
284
  if val[1]:
285
  messages.append({"role": "assistant", "content": val[1]})
286
 
287
- messages.append({"role": "user", "content": result['extracted_text']})
 
288
 
289
  response = ""
290
-
291
  try:
292
- for message in client.chat_completion(
293
- messages,
294
- max_tokens=max_tokens,
295
- stream=True,
296
- temperature=temperature,
297
- top_p=top_p,
298
- ):
299
  token = message.choices[0].delta.content if message.choices[0].delta else ""
300
  response += token
301
  yield response
302
  except Exception as e:
303
  yield f"Error: {str(e)}"
304
 
305
- # Gradio UI for video chatbot
306
- demo = gr.ChatInterface(
307
- respond,
308
- additional_inputs=[
309
- gr.Video(label="Upload a Video"), # Video input
310
- gr.Textbox(value="You are a chatbot that analyzes emotions and responds accordingly.", label="System message"),
311
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
312
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
313
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
314
- ],
315
- )
 
 
 
 
 
316
 
 
317
  if __name__ == "__main__":
318
  demo.launch()
 
259
  MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
260
  client = InferenceClient(MODEL_NAME)
261
 
262
+ # # Chatbot response function
263
+ # def respond(video, history, system_message, max_tokens, temperature, top_p):
264
+ # video_path = video.name # Get the uploaded video file path
265
+
266
+ # # Process the video for emotions & text
267
+ # result = transcribe_and_predict_video(video_path)
268
+
269
+ # # Construct a system prompt with extracted emotions & text
270
+ # system_prompt = (
271
+ # f"{system_message}\n\n"
272
+ # f"Detected Emotions:\n"
273
+ # f"- Text Emotion: {result['text_emotion']}\n"
274
+ # f"- Audio Emotion: {result['audio_emotion']}\n"
275
+ # f"- Image Emotion: {result['image_emotion']}\n\n"
276
+ # f"Extracted Speech: {result['extracted_text']}"
277
+ # )
278
+
279
+ # messages = [{"role": "system", "content": system_prompt}]
280
+
281
+ # for val in history:
282
+ # if val[0]:
283
+ # messages.append({"role": "user", "content": val[0]})
284
+ # if val[1]:
285
+ # messages.append({"role": "assistant", "content": val[1]})
286
+
287
+ # messages.append({"role": "user", "content": result['extracted_text']})
288
+
289
+ # response = ""
290
+
291
+ # try:
292
+ # for message in client.chat_completion(
293
+ # messages,
294
+ # max_tokens=max_tokens,
295
+ # stream=True,
296
+ # temperature=temperature,
297
+ # top_p=top_p,
298
+ # ):
299
+ # token = message.choices[0].delta.content if message.choices[0].delta else ""
300
+ # response += token
301
+ # yield response
302
+ # except Exception as e:
303
+ # yield f"Error: {str(e)}"
304
+
305
+ # # Gradio UI for video chatbot
306
+ # demo = gr.ChatInterface(
307
+ # respond,
308
+ # additional_inputs=[
309
+ # gr.Video(label="Upload a Video"), # Video input
310
+ # gr.Textbox(value="You are a chatbot that analyzes emotions and responds accordingly.", label="System message"),
311
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
312
+ # gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
313
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
314
+ # ],
315
+ # )
316
+
317
+ # if __name__ == "__main__":
318
+ # demo.launch()
319
+
320
+
321
+
322
+ def respond(video, text_input, history):
323
+ """Processes user input (video, text, or both) and generates a chatbot response."""
324
+ messages = []
325
+ system_prompt = "You are a chatbot that can analyze emotions from videos and respond accordingly."
326
+
327
+ if video is not None:
328
+ video_path = video.name # Get uploaded video path
329
+ result = transcribe_and_predict_video(video_path)
330
+
331
+ system_prompt += f"\n\nDetected Emotions:\n"
332
+ system_prompt += f"- Text Emotion: {result['text_emotion']}\n"
333
+ system_prompt += f"- Audio Emotion: {result['audio_emotion']}\n"
334
+ system_prompt += f"- Image Emotion: {result['image_emotion']}\n\n"
335
+ system_prompt += f"Extracted Speech: {result['extracted_text']}\n"
336
 
337
+ messages.append({"role": "user", "content": result["extracted_text"]}) # Add extracted speech
 
 
 
 
 
 
 
 
338
 
339
+ if text_input:
340
+ messages.append({"role": "user", "content": text_input}) # Add text input if provided
341
 
342
+ # Append chat history
343
  for val in history:
344
  if val[0]:
345
  messages.append({"role": "user", "content": val[0]})
346
  if val[1]:
347
  messages.append({"role": "assistant", "content": val[1]})
348
 
349
+ # Include system prompt
350
+ messages.insert(0, {"role": "system", "content": system_prompt})
351
 
352
  response = ""
353
+
354
  try:
355
+ for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
 
 
 
 
 
 
356
  token = message.choices[0].delta.content if message.choices[0].delta else ""
357
  response += token
358
  yield response
359
  except Exception as e:
360
  yield f"Error: {str(e)}"
361
 
362
+ # Define ChatGPT-style UI
363
+ with gr.Blocks(theme="soft") as demo:
364
+ gr.Markdown("<h2 align='center'>📹🎤💬 Multi-Modal Chatbot (Video + Text) </h2>")
365
+
366
+ chatbot = gr.Chatbot(label="ChatGPT-Like Chat")
367
+ video_input = gr.Video(label="Upload Video (Optional)")
368
+ text_input = gr.Textbox(label="Enter Text (Optional)", placeholder="Type your message here...")
369
+
370
+ clear_button = gr.Button("Clear Chat")
371
+
372
+ def clear_chat():
373
+ return [], None, None
374
+
375
+ video_input.change(clear_chat, outputs=[chatbot, video_input, text_input])
376
+ text_input.submit(respond, inputs=[video_input, text_input, chatbot], outputs=[chatbot])
377
+ clear_button.click(clear_chat, outputs=[chatbot, video_input, text_input])
378
 
379
+ # Launch chatbot
380
  if __name__ == "__main__":
381
  demo.launch()