Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Sleeping

App Files Files Community

reab5555 commited on Jul 28, 2024

Commit

cdca32f

verified ·

1 Parent(s): aeb4947

Update video_processing.py

Browse files

Files changed (1) hide show

video_processing.py +36 -74

video_processing.py CHANGED Viewed

@@ -8,6 +8,7 @@ from PIL import Image, ImageDraw, ImageFont
 import math
 from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
 from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
 from anomaly_detection import anomaly_detection
 from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
 from utils import frame_to_timecode
@@ -15,6 +16,7 @@ import pandas as pd
 from facenet_pytorch import MTCNN
 import torch
 import mediapipe as mp
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
@@ -22,71 +24,6 @@ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_fac
 mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
-def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
-    os.makedirs(output_folder, exist_ok=True)
-    clip = VideoFileClip(video_path)
-    original_fps = clip.fps
-    duration = clip.duration
-    total_frames = int(duration * original_fps)
-    step = max(1, original_fps / desired_fps)
-    total_frames_to_extract = int(total_frames / step)
-    frame_count = 0
-    for t in np.arange(0, duration, step / original_fps):
-        frame = clip.get_frame(t)
-        cv2.imwrite(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        frame_count += 1
-        if progress_callback:
-            progress = min(100, (frame_count / total_frames_to_extract) * 100)
-            progress_callback(progress, f"Extracting frame")
-        if frame_count >= total_frames_to_extract:
-            break
-    clip.close()
-    return frame_count, original_fps
-def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
-    embeddings_by_frame = {}
-    posture_scores_by_frame = {}
-    posture_landmarks_by_frame = {}
-    facial_landmarks_by_frame = {}
-    aligned_face_paths = []
-    frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
-    for i, frame_file in enumerate(frame_files):
-        frame_num = int(frame_file.split('_')[1].split('.')[0])
-        frame_path = os.path.join(frames_folder, frame_file)
-        frame = cv2.imread(frame_path)
-        if frame is not None:
-            posture_score, posture_landmarks = calculate_posture_score(frame)
-            posture_scores_by_frame[frame_num] = posture_score
-            posture_landmarks_by_frame[frame_num] = posture_landmarks
-            boxes, probs = mtcnn.detect(frame)
-            if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
-                x1, y1, x2, y2 = [int(b) for b in boxes[0]]
-                face = frame[y1:y2, x1:x2]
-                if face.size > 0:
-                    face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
-                    results = face_mesh.process(face_rgb)
-                    if results.multi_face_landmarks:
-                        facial_landmarks_by_frame[frame_num] = results.multi_face_landmarks[0]
-                        if is_frontal_face(results.multi_face_landmarks[0].landmark):
-                            aligned_face = face
-                            if aligned_face is not None:
-                                aligned_face_resized = cv2.resize(aligned_face, (160, 160))
-                                output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
-                                cv2.imwrite(output_path, aligned_face_resized)
-                                aligned_face_paths.append(output_path)
-                                embedding = get_face_embedding(aligned_face_resized)
-                                embeddings_by_frame[frame_num] = embedding
-        progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
-    return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
 def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
     start_time = time.time()
     output_folder = "output"
@@ -94,7 +31,8 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
     GRAPH_COLORS = {
         'facial_embeddings': 'navy',
-        'body_posture': 'purple'
     }
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -147,7 +85,13 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
         progress(0.75, "Getting face samples")
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
-        progress(0.8, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
         X_embeddings = df[embedding_columns].values
@@ -159,9 +103,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")
-            mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
-            progress(0.85, "Generating graphs")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
                                                                       color=GRAPH_COLORS['facial_embeddings'],
                                                                       anomaly_threshold=anomaly_threshold)
@@ -176,21 +122,28 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
                                                        anomaly_threshold, color=GRAPH_COLORS['body_posture'])
-            mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
-            progress(0.9, "Generating video with heatmap")
             # Create video with heatmap
             heatmap_video_path = os.path.join(output_folder, "heatmap_video.mp4")
-            heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, heatmap_video_path, original_fps, largest_cluster)
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
             traceback.print_exc()
-            return (f"Error in video processing: {str(e)}",) + (None,) * 16
         progress(1.0, "Preparing results")
         results = f"Number of persons detected: {num_clusters}\n\n"
@@ -242,12 +195,16 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             df,
             mse_embeddings,
             mse_posture,
             mse_plot_embeddings,
-            mse_histogram_embeddings,
             mse_plot_posture,
             mse_histogram_posture,
             mse_heatmap_embeddings,
             mse_heatmap_posture,
             face_samples["most_frequent"],
             anomaly_faces_embeddings,
             anomaly_frames_posture_images,
@@ -256,6 +213,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
             heatmap_video_path
         )
 def is_frontal_face(landmarks, threshold=60):
     nose_tip = landmarks[4]
     left_chin = landmarks[234]

 import math
 from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
 from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
+from voice_analysis import extract_audio_from_video, diarize_speakers, get_speaker_embeddings
 from anomaly_detection import anomaly_detection
 from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
 from utils import frame_to_timecode
 from facenet_pytorch import MTCNN
 import torch
 import mediapipe as mp
+from pyannote.audio import Model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
 mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
 def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
     start_time = time.time()
     output_folder = "output"
     GRAPH_COLORS = {
         'facial_embeddings': 'navy',
+        'body_posture': 'purple',
+        'voice': 'green'
     }
     with tempfile.TemporaryDirectory() as temp_dir:
         progress(0.75, "Getting face samples")
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
+        progress(0.8, "Extracting audio and performing voice analysis")
+        audio_path = extract_audio_from_video(video_path)
+        diarization = diarize_speakers(audio_path)
+        voice_model = Model.from_pretrained("pyannote/embedding")
+        voice_embeddings = get_speaker_embeddings(audio_path, diarization, voice_model)
+        progress(0.85, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
         X_embeddings = df[embedding_columns].values
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")
+            X_voice = np.array([emb['embedding'] for emb in voice_embeddings])
+            mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
+            progress(0.9, "Generating graphs")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
                                                                       color=GRAPH_COLORS['facial_embeddings'],
                                                                       anomaly_threshold=anomaly_threshold)
             mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
                                                        anomaly_threshold, color=GRAPH_COLORS['body_posture'])
+            mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
+                                                            color=GRAPH_COLORS['voice'],
+                                                            anomaly_threshold=anomaly_threshold)
+            mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
+                                                     anomaly_threshold, color=GRAPH_COLORS['voice'])
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
+            mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
+            mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
+            progress(0.95, "Generating video with heatmap")
             # Create video with heatmap
             heatmap_video_path = os.path.join(output_folder, "heatmap_video.mp4")
+            heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice, heatmap_video_path, original_fps, largest_cluster)
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
             traceback.print_exc()
+            return (f"Error in video processing: {str(e)}",) + (None,) * 21
         progress(1.0, "Preparing results")
         results = f"Number of persons detected: {num_clusters}\n\n"
             df,
             mse_embeddings,
             mse_posture,
+            mse_voice,
             mse_plot_embeddings,
             mse_plot_posture,
+            mse_plot_voice,
+            mse_histogram_embeddings,
             mse_histogram_posture,
+            mse_histogram_voice,
             mse_heatmap_embeddings,
             mse_heatmap_posture,
+            mse_heatmap_voice,
             face_samples["most_frequent"],
             anomaly_faces_embeddings,
             anomaly_frames_posture_images,
             heatmap_video_path
         )
 def is_frontal_face(landmarks, threshold=60):
     nose_tip = landmarks[4]
     left_chin = landmarks[234]