Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 28, 2024

Commit

8b78d8c

verified ·

1 Parent(s): b568300

Update video_processing.py

Browse files

Files changed (1) hide show

video_processing.py +30 -1

video_processing.py CHANGED Viewed

@@ -15,7 +15,8 @@ import pandas as pd
 from facenet_pytorch import MTCNN
 import torch
 import mediapipe as mp
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
@@ -160,6 +161,21 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
             progress(0.95, "Generating plots")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
                                                                       color=GRAPH_COLORS['facial_embeddings'],
@@ -179,6 +195,14 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
@@ -241,6 +265,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             mse_histogram_posture,
             mse_heatmap_embeddings,
             mse_heatmap_posture,
             face_samples["most_frequent"],
             anomaly_faces_embeddings,
             anomaly_frames_posture_images,

 from facenet_pytorch import MTCNN
 import torch
 import mediapipe as mp
+from voice_analysis import process_audio, cluster_voices, get_most_frequent_voice
+from pydub import AudioSegment
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
             mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
+            # Extract audio from video
+            video = AudioSegment.from_file(video_path, "mp4")
+            audio_path = os.path.join(temp_dir, "audio.wav")
+            video.export(audio_path, format="wav")
+            # Process audio
+            voice_embeddings = process_audio(audio_path)
+            voice_clusters = cluster_voices(voice_embeddings)
+            most_frequent_voice = get_most_frequent_voice(voice_embeddings, voice_clusters)
+            # Perform anomaly detection on voice
+            X_voice = np.array(most_frequent_voice)
+            mse_voice = anomaly_detection(X_voice, X_voice)  # Using the same function as for facial features
             progress(0.95, "Generating plots")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
                                                                       color=GRAPH_COLORS['facial_embeddings'],
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
+            # Generate plots for voice
+            mse_plot_voice, anomaly_segments_voice = plot_mse(df, mse_voice, "Voice",
+                                                              color='green',
+                                                              anomaly_threshold=anomaly_threshold)
+            mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
+                                                     anomaly_threshold, color='green')
+            mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
             mse_histogram_posture,
             mse_heatmap_embeddings,
             mse_heatmap_posture,
+            mse_voice,
+            mse_plot_voice,
+            mse_histogram_voice,
+            mse_heatmap_voice,
+            anomaly_segments_voice,
             face_samples["most_frequent"],
             anomaly_faces_embeddings,
             anomaly_frames_posture_images,