Spaces:

annapurnapadmaprema-ji
/

video-emotion-recognition

Running

File size: 4,104 Bytes

d56bf6b
 
 
 
 
 
 
 
94b90eb
d56bf6b

import os
import numpy as np
import cv2
import librosa
import joblib
from deepface import DeepFace
import streamlit as st
from collections import Counter
from moviepy import VideoFileClip


emotion_map = {
    'angry': 0,
    'disgust': 1,
    'fear': 2,
    'happy': 3,
    'neutral': 4,
    'sad': 5
}


def split_video_into_frames_and_analyze_emotions(video_path, frame_rate=1):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        st.error("Error: Could not open video.")
        return

    frame_count = 0
    success, frame = cap.read()

    emotion_counter = Counter()

    while success:
        if frame_count % frame_rate == 0:
            try:
                analysis = DeepFace.analyze(frame, actions=['emotion'])
                if isinstance(analysis, list):
                    for result in analysis:
                        dominant_emotion = result['dominant_emotion']
                        emotion_counter[dominant_emotion] += 1
                else:
                    dominant_emotion = analysis['dominant_emotion']
                    emotion_counter[dominant_emotion] += 1
            except Exception as e:
                pass

        success, frame = cap.read()
        frame_count += 1

    cap.release()

    if emotion_counter:
        highest_occurring_emotion = emotion_counter.most_common(1)[0][0]
    else:
        highest_occurring_emotion = None

    return highest_occurring_emotion

def extract_audio_from_video(video_path):
    video_clip = VideoFileClip(video_path)
    audio_path = "temp_audio.wav"
    video_clip.audio.write_audiofile(audio_path)
    audio_array, sr = librosa.load(audio_path, sr=None)
    os.remove(audio_path)
    return audio_array, sr

def extract_features(audio_array, sr, max_length=100):
    try:
        mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio_array, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio_array, sr=sr)

        features = np.vstack([mfccs, chroma, spectral_contrast])
        if features.shape[1] < max_length:
            features = np.pad(features, ((0, 0), (0, max_length - features.shape[1])), mode='constant')
        elif features.shape[1] > max_length:
            features = features[:, :max_length]
        return features.T
    except Exception as e:
        st.error(f"Error extracting features from audio: {str(e)}")
        return None

def main():
    with open("style.css") as f:
      st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
    st.title("Emotion Detection from Video")
    
    uploaded_file = st.file_uploader("Upload a video", type=["mp4"])
    if uploaded_file is not None:
        video_path = "uploaded_video.mp4"
        with open(video_path, "wb") as f:
            f.write(uploaded_file.read())
        
        st.write("Processing video...please wait")
        highest_emotion = split_video_into_frames_and_analyze_emotions(video_path)
        audio_array, sr = extract_audio_from_video(video_path)

        model_path = "SVMexec_modeltesting113.pkl"
        svm_model = joblib.load(model_path)
        scaler = joblib.load('scaler.pkl')

        features = extract_features(audio_array, sr)
        if features is not None:
            features_2d = features.reshape(1, -1)
            features_normalized = scaler.transform(features_2d)

            predicted_class = svm_model.predict(features_normalized)[0]
            emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad']
            predicted_emotion = emotion_labels[predicted_class]
            
            if highest_emotion == predicted_emotion:
                st.write(f"The person in the video is {predicted_emotion}.")
            else:
                st.write(f"The emotions from the frames and audio do not match, but the facial expression seems to be {highest_emotion}, while the audio emotion seems to be {predicted_emotion}.")
        else:
            st.write("Failed to extract features from the audio file.")

if __name__ == "__main__":
    main()