Garvitj commited on
Commit
18d4be8
·
verified ·
1 Parent(s): 915d8ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -249,13 +249,17 @@ def process_audio_from_video(video_path):
249
 
250
 
251
 
252
-
253
  import gradio as gr
254
  from huggingface_hub import InferenceClient
255
  from transformers import AutoTokenizer, AutoModelForCausalLM
256
- from huggingface_hub import InferenceClient
257
 
258
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
259
 
260
 
261
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
@@ -286,10 +290,10 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
286
 
287
 
288
  # Function to handle video processing and interaction
289
- def transcribe_and_predict_video(video, chat_history=[]):
290
  # Process the video for emotions (use your own emotion detection functions)
291
  image_emotion = process_video(video)
292
- text_emotion, audio_emotion, user_input = process_audio_from_video(video)
293
  em = [image_emotion, text_emotion, audio_emotion]
294
 
295
  # Format the conversation history
@@ -324,20 +328,17 @@ def transcribe_and_predict_video(video, chat_history=[]):
324
 
325
 
326
  # Gradio interface setup
327
- demo = gr.ChatInterface(
328
- respond,
329
- additional_inputs=[
330
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
331
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
332
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
333
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
334
- ],
335
  )
336
 
337
  # Launch the Gradio interface
338
  if __name__ == "__main__":
339
- demo.launch()
340
-
341
 
342
 
343
 
 
249
 
250
 
251
 
252
+ import torch
253
  import gradio as gr
254
  from huggingface_hub import InferenceClient
255
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
256
 
257
+ # Hugging Face Inference Client (equivalent to the reference code's client)
258
+ client = InferenceClient("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
259
+
260
+ # Tokenizer and model loading (still necessary if you want to process locally)
261
+ tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
262
+ model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
263
 
264
 
265
  def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
290
 
291
 
292
  # Function to handle video processing and interaction
293
+ def transcribe_and_predict_video(video, user_input, chat_history=[]):
294
  # Process the video for emotions (use your own emotion detection functions)
295
  image_emotion = process_video(video)
296
+ text_emotion, audio_emotion = process_audio_from_video(video)
297
  em = [image_emotion, text_emotion, audio_emotion]
298
 
299
  # Format the conversation history
 
328
 
329
 
330
  # Gradio interface setup
331
+ iface = gr.Interface(
332
+ fn=transcribe_and_predict_video,
333
+ inputs=[gr.Video(), gr.Textbox(), gr.State()], # Accepting video input, user text, and chat history
334
+ outputs=[gr.Textbox(), gr.State()], # Output is the response and updated chat history
335
+ title="Multimodal Emotion Recognition from Video",
336
+ description="Upload a video to get text, audio, and image emotion predictions and interact with the chatbot."
 
 
337
  )
338
 
339
  # Launch the Gradio interface
340
  if __name__ == "__main__":
341
+ iface.launch()
 
342
 
343
 
344