Spaces:

Garvitj
/

emotion-llm

Sleeping

Garvitj commited on Jan 17

Commit

9b5af99

verified ·

1 Parent(s): 199605d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from collections import Counter
 import os
 # Load necessary models and files
-text_model = load_model('model_for_text_emotion_updated(1).keras')  # Load your text emotion model
 with open('tokenizer.json') as json_file:
     tokenizer = tokenizer_from_json(json.load(json_file))  # Tokenizer for text emotion
 audio_model = load_model('my_model.h5')  # Load audio emotion model
@@ -44,6 +44,9 @@ def predict_text_emotion(text):
 # Extract audio features and predict emotion
 def extract_audio_features(audio_data, sample_rate):
     mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
     return np.expand_dims(mfcc, axis=0)
@@ -111,7 +114,12 @@ def transcribe_and_predict_video(video_path):
     image_emotion = process_video(video_path)
     # Predict emotion from audio (sound-based)
-    sample_rate, audio_data = librosa.load(audio_file, sr=None)
     audio_emotion = predict_audio_emotion(audio_data, sample_rate)
     # Combine the detected emotions for final output (you could average them or choose the most common)

 import os
 # Load necessary models and files
+text_model = load_model('model_for_text_emotion_updated(1).keras')  # Load text emotion model
 with open('tokenizer.json') as json_file:
     tokenizer = tokenizer_from_json(json.load(json_file))  # Tokenizer for text emotion
 audio_model = load_model('my_model.h5')  # Load audio emotion model
 # Extract audio features and predict emotion
 def extract_audio_features(audio_data, sample_rate):
+    if not isinstance(audio_data, np.ndarray):
+        audio_data = np.array(audio_data)  # Ensure it's a NumPy array
     mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
     return np.expand_dims(mfcc, axis=0)
     image_emotion = process_video(video_path)
     # Predict emotion from audio (sound-based)
+    audio_data, sample_rate = librosa.load(audio_file, sr=None)
+    # Debugging print statements
+    print(f"Type of audio_data: {type(audio_data)}")  # Ensure audio_data is numpy.ndarray
+    print(f"Sample rate: {sample_rate}")
     audio_emotion = predict_audio_emotion(audio_data, sample_rate)
     # Combine the detected emotions for final output (you could average them or choose the most common)