Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -44,15 +44,17 @@ def predict_text_emotion(text):
|
|
| 44 |
# Extract audio features and predict emotion
|
| 45 |
def extract_audio_features(audio_data, sample_rate):
|
| 46 |
if not isinstance(audio_data, np.ndarray):
|
| 47 |
-
audio_data = np.array(audio_data)
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
features = np.expand_dims(mfcc, axis=0)
|
| 51 |
-
features = np.reshape(features, (1, 704))
|
| 52 |
return features
|
| 53 |
|
| 54 |
def predict_audio_emotion(audio_data, sample_rate):
|
| 55 |
features = extract_audio_features(audio_data, sample_rate)
|
|
|
|
| 56 |
prediction = audio_model.predict(features)
|
| 57 |
emotion_index = np.argmax(prediction)
|
| 58 |
return emotion_mapping[emotion_index]
|
|
@@ -115,7 +117,7 @@ def transcribe_and_predict_video(video_path):
|
|
| 115 |
image_emotion = process_video(video_path)
|
| 116 |
|
| 117 |
# Predict emotion from audio (sound-based)
|
| 118 |
-
|
| 119 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
| 120 |
|
| 121 |
# Combine detected emotions for final output (majority voting can be implemented)
|
|
|
|
| 44 |
# Extract audio features and predict emotion
|
| 45 |
def extract_audio_features(audio_data, sample_rate):
|
| 46 |
if not isinstance(audio_data, np.ndarray):
|
| 47 |
+
audio_data = np.array(audio_data, dtype=np.float32) # Ensure it is a NumPy array with float type
|
| 48 |
+
else:
|
| 49 |
+
audio_data = audio_data.astype(np.float32) # Convert to float32
|
| 50 |
+
|
| 51 |
+
mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40).T, axis=0)
|
| 52 |
features = np.expand_dims(mfcc, axis=0)
|
|
|
|
| 53 |
return features
|
| 54 |
|
| 55 |
def predict_audio_emotion(audio_data, sample_rate):
|
| 56 |
features = extract_audio_features(audio_data, sample_rate)
|
| 57 |
+
features = np.reshape(features, (1, 40)) # Match model expected input
|
| 58 |
prediction = audio_model.predict(features)
|
| 59 |
emotion_index = np.argmax(prediction)
|
| 60 |
return emotion_mapping[emotion_index]
|
|
|
|
| 117 |
image_emotion = process_video(video_path)
|
| 118 |
|
| 119 |
# Predict emotion from audio (sound-based)
|
| 120 |
+
audio_data, sample_rate = librosa.load(audio_file, sr=None)
|
| 121 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
| 122 |
|
| 123 |
# Combine detected emotions for final output (majority voting can be implemented)
|