Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from collections import Counter
|
|
15 |
import os
|
16 |
|
17 |
# Load necessary models and files
|
18 |
-
text_model = load_model('model_for_text_emotion_updated(1).keras') # Load
|
19 |
with open('tokenizer.json') as json_file:
|
20 |
tokenizer = tokenizer_from_json(json.load(json_file)) # Tokenizer for text emotion
|
21 |
audio_model = load_model('my_model.h5') # Load audio emotion model
|
@@ -44,6 +44,9 @@ def predict_text_emotion(text):
|
|
44 |
|
45 |
# Extract audio features and predict emotion
|
46 |
def extract_audio_features(audio_data, sample_rate):
|
|
|
|
|
|
|
47 |
mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
|
48 |
return np.expand_dims(mfcc, axis=0)
|
49 |
|
@@ -111,7 +114,12 @@ def transcribe_and_predict_video(video_path):
|
|
111 |
image_emotion = process_video(video_path)
|
112 |
|
113 |
# Predict emotion from audio (sound-based)
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
115 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
116 |
|
117 |
# Combine the detected emotions for final output (you could average them or choose the most common)
|
|
|
15 |
import os
|
16 |
|
17 |
# Load necessary models and files
|
18 |
+
text_model = load_model('model_for_text_emotion_updated(1).keras') # Load text emotion model
|
19 |
with open('tokenizer.json') as json_file:
|
20 |
tokenizer = tokenizer_from_json(json.load(json_file)) # Tokenizer for text emotion
|
21 |
audio_model = load_model('my_model.h5') # Load audio emotion model
|
|
|
44 |
|
45 |
# Extract audio features and predict emotion
|
46 |
def extract_audio_features(audio_data, sample_rate):
|
47 |
+
if not isinstance(audio_data, np.ndarray):
|
48 |
+
audio_data = np.array(audio_data) # Ensure it's a NumPy array
|
49 |
+
|
50 |
mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
|
51 |
return np.expand_dims(mfcc, axis=0)
|
52 |
|
|
|
114 |
image_emotion = process_video(video_path)
|
115 |
|
116 |
# Predict emotion from audio (sound-based)
|
117 |
+
audio_data, sample_rate = librosa.load(audio_file, sr=None)
|
118 |
+
|
119 |
+
# Debugging print statements
|
120 |
+
print(f"Type of audio_data: {type(audio_data)}") # Ensure audio_data is numpy.ndarray
|
121 |
+
print(f"Sample rate: {sample_rate}")
|
122 |
+
|
123 |
audio_emotion = predict_audio_emotion(audio_data, sample_rate)
|
124 |
|
125 |
# Combine the detected emotions for final output (you could average them or choose the most common)
|