File size: 4,104 Bytes
d56bf6b 94b90eb d56bf6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import os
import numpy as np
import cv2
import librosa
import joblib
from deepface import DeepFace
import streamlit as st
from collections import Counter
from moviepy import VideoFileClip
emotion_map = {
'angry': 0,
'disgust': 1,
'fear': 2,
'happy': 3,
'neutral': 4,
'sad': 5
}
def split_video_into_frames_and_analyze_emotions(video_path, frame_rate=1):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
st.error("Error: Could not open video.")
return
frame_count = 0
success, frame = cap.read()
emotion_counter = Counter()
while success:
if frame_count % frame_rate == 0:
try:
analysis = DeepFace.analyze(frame, actions=['emotion'])
if isinstance(analysis, list):
for result in analysis:
dominant_emotion = result['dominant_emotion']
emotion_counter[dominant_emotion] += 1
else:
dominant_emotion = analysis['dominant_emotion']
emotion_counter[dominant_emotion] += 1
except Exception as e:
pass
success, frame = cap.read()
frame_count += 1
cap.release()
if emotion_counter:
highest_occurring_emotion = emotion_counter.most_common(1)[0][0]
else:
highest_occurring_emotion = None
return highest_occurring_emotion
def extract_audio_from_video(video_path):
video_clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video_clip.audio.write_audiofile(audio_path)
audio_array, sr = librosa.load(audio_path, sr=None)
os.remove(audio_path)
return audio_array, sr
def extract_features(audio_array, sr, max_length=100):
try:
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
chroma = librosa.feature.chroma_stft(y=audio_array, sr=sr)
spectral_contrast = librosa.feature.spectral_contrast(y=audio_array, sr=sr)
features = np.vstack([mfccs, chroma, spectral_contrast])
if features.shape[1] < max_length:
features = np.pad(features, ((0, 0), (0, max_length - features.shape[1])), mode='constant')
elif features.shape[1] > max_length:
features = features[:, :max_length]
return features.T
except Exception as e:
st.error(f"Error extracting features from audio: {str(e)}")
return None
def main():
with open("style.css") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
st.title("Emotion Detection from Video")
uploaded_file = st.file_uploader("Upload a video", type=["mp4"])
if uploaded_file is not None:
video_path = "uploaded_video.mp4"
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
st.write("Processing video...please wait")
highest_emotion = split_video_into_frames_and_analyze_emotions(video_path)
audio_array, sr = extract_audio_from_video(video_path)
model_path = "SVMexec_modeltesting113.pkl"
svm_model = joblib.load(model_path)
scaler = joblib.load('scaler.pkl')
features = extract_features(audio_array, sr)
if features is not None:
features_2d = features.reshape(1, -1)
features_normalized = scaler.transform(features_2d)
predicted_class = svm_model.predict(features_normalized)[0]
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad']
predicted_emotion = emotion_labels[predicted_class]
if highest_emotion == predicted_emotion:
st.write(f"The person in the video is {predicted_emotion}.")
else:
st.write(f"The emotions from the frames and audio do not match, but the facial expression seems to be {highest_emotion}, while the audio emotion seems to be {predicted_emotion}.")
else:
st.write("Failed to extract features from the audio file.")
if __name__ == "__main__":
main()
|