Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -44,15 +44,17 @@ def predict_text_emotion(text):
|
|
44 |
# Extract audio features and predict emotion
|
45 |
def extract_audio_features(audio_data, sample_rate):
|
46 |
if not isinstance(audio_data, np.ndarray):
|
47 |
-
audio_data = np.array(audio_data)
|
48 |
-
|
49 |
-
|
|
|
|
|
50 |
features = np.expand_dims(mfcc, axis=0)
|
51 |
-
features = np.reshape(features, (1, 704))
|
52 |
return features
|
53 |
|
54 |
def predict_audio_emotion(audio_data, sample_rate):
|
55 |
features = extract_audio_features(audio_data, sample_rate)
|
|
|
56 |
prediction = audio_model.predict(features)
|
57 |
emotion_index = np.argmax(prediction)
|
58 |
return emotion_mapping[emotion_index]
|
@@ -115,7 +117,7 @@ def transcribe_and_predict_video(video_path):
|
|
115 |
image_emotion = process_video(video_path)
|
116 |
|
117 |
# Predict emotion from audio (sound-based)
|
118 |
-
|
119 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
120 |
|
121 |
# Combine detected emotions for final output (majority voting can be implemented)
|
|
|
44 |
# Extract audio features and predict emotion
|
45 |
def extract_audio_features(audio_data, sample_rate):
|
46 |
if not isinstance(audio_data, np.ndarray):
|
47 |
+
audio_data = np.array(audio_data, dtype=np.float32) # Ensure it is a NumPy array with float type
|
48 |
+
else:
|
49 |
+
audio_data = audio_data.astype(np.float32) # Convert to float32
|
50 |
+
|
51 |
+
mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40).T, axis=0)
|
52 |
features = np.expand_dims(mfcc, axis=0)
|
|
|
53 |
return features
|
54 |
|
55 |
def predict_audio_emotion(audio_data, sample_rate):
|
56 |
features = extract_audio_features(audio_data, sample_rate)
|
57 |
+
features = np.reshape(features, (1, 40)) # Match model expected input
|
58 |
prediction = audio_model.predict(features)
|
59 |
emotion_index = np.argmax(prediction)
|
60 |
return emotion_mapping[emotion_index]
|
|
|
117 |
image_emotion = process_video(video_path)
|
118 |
|
119 |
# Predict emotion from audio (sound-based)
|
120 |
+
audio_data, sample_rate = librosa.load(audio_file, sr=None)
|
121 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
122 |
|
123 |
# Combine detected emotions for final output (majority voting can be implemented)
|