Garvitj commited on
Commit
3a2b7bc
·
verified ·
1 Parent(s): dac5e9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -6
app.py CHANGED
@@ -103,7 +103,7 @@ def find_emotion_using_text(sample_rate, audio_data, recognizer):
103
 
104
  os.remove(temp_audio_path)
105
  max_index = text_prediction.argmax()
106
- return mapping[max_index]
107
 
108
  # Predict emotion from audio
109
  def predict_emotion(audio_data):
@@ -244,14 +244,58 @@ def process_audio_from_video(video_path):
244
  print(f"Error processing audio features: {e}")
245
  audio_emotion = "Error in audio processing"
246
 
247
- return text_emotion, audio_emotion
248
 
249
 
250
- # Main function to handle video emotion recognition
251
- def transcribe_and_predict_video(video):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  image_emotion = process_video(video)
253
- text_emotion, audio_emotion = process_audio_from_video(video)
254
- return f"Text Emotion: {text_emotion}, Audio Emotion: {audio_emotion}, Image Emotion: {image_emotion}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  # Create Gradio interface
257
  iface = gr.Interface(fn=transcribe_and_predict_video,
@@ -261,3 +305,5 @@ iface = gr.Interface(fn=transcribe_and_predict_video,
261
  description="Upload a video to get text, audio, and image emotion predictions.")
262
 
263
  iface.launch()
 
 
 
103
 
104
  os.remove(temp_audio_path)
105
  max_index = text_prediction.argmax()
106
+ return mapping[max_index],text
107
 
108
  # Predict emotion from audio
109
  def predict_emotion(audio_data):
 
244
  print(f"Error processing audio features: {e}")
245
  audio_emotion = "Error in audio processing"
246
 
247
+ return text_emotion, audio_emotion,text
248
 
249
 
250
+
251
+
252
+
253
+ import torch
254
+ from transformers import AutoModelForCausalLM, AutoTokenizer
255
+ import gradio as gr
256
+
257
+ # Load Mistral 7B
258
+ model_name = "mistralai/Mistral-7B-Instruct"
259
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
260
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
261
+
262
+
263
+
264
+ def transcribe_and_predict_video(video, chat_history=[]):
265
+ # Process the video for emotions
266
  image_emotion = process_video(video)
267
+ text_emotion, audio_emotion, user_input = process_audio_from_video(video)
268
+ em = [image_emotion, text_emotion, audio_emotion]
269
+
270
+ # Format the conversation history
271
+ history_text = "".join([f"User ({msg[2]}): {msg[0]}\nBot: {msg[1]}\n" for msg in chat_history])
272
+
273
+ # Construct the prompt with emotion context and history
274
+ prompt = f"""
275
+ You are a helpful AI assistant. Respond like a human while considering the user's emotion.
276
+
277
+ User's Emotion: {em}
278
+
279
+ Conversation History:
280
+ {history_text}
281
+
282
+ User ({em}): {user_input}
283
+ Bot:"""
284
+
285
+ # Tokenize input
286
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
287
+
288
+ # Generate response
289
+ output = model.generate(**inputs, max_length=512, temperature=0.7, top_p=0.9, do_sample=True)
290
+ response = tokenizer.decode(output[0], skip_special_tokens=True).split("Bot:")[-1].strip()
291
+
292
+ # Store the current emotion for the user input (You can modify the emotion detection based on your needs)
293
+ emotion = detect_emotion(user_input) # Assuming `detect_emotion` is a function that returns the user's emotion
294
+
295
+ # Update the chat history with the current conversation and emotion
296
+ chat_history.append((user_input, response, emotion))
297
+
298
+ return response, chat_history
299
 
300
  # Create Gradio interface
301
  iface = gr.Interface(fn=transcribe_and_predict_video,
 
305
  description="Upload a video to get text, audio, and image emotion predictions.")
306
 
307
  iface.launch()
308
+
309
+