Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 29, 2024

Commit

50833e2

verified ·

1 Parent(s): 2128c11

Update video_processing.py

Browse files

Files changed (1) hide show

video_processing.py +20 -51

video_processing.py CHANGED Viewed

@@ -94,12 +94,6 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
-    GRAPH_COLORS = {
-        'facial_embeddings': 'navy',
-        'body_posture': 'purple',
-        'voice': 'green'
-    }
     with tempfile.TemporaryDirectory() as temp_dir:
         aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
         organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
@@ -151,97 +145,71 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.8, "Extracting audio and performing voice analysis")
         audio_path = extract_audio_from_video(video_path)
         diarization, most_frequent_speaker = diarize_speakers(audio_path)
         voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
         aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
         progress(0.85, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
-        # Ensure X_voice has the same length as X_embeddings
         X_embeddings = df[embedding_columns].values
         X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
         X_posture = X_posture[X_posture != None].reshape(-1, 1)
         X_voice = np.array(aligned_voice_embeddings)
-        # Trim or pad X_voice to match X_embeddings length
         if len(X_voice) > len(X_embeddings):
             X_voice = X_voice[:len(X_embeddings)]
         elif len(X_voice) < len(X_embeddings):
             padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
             X_voice = np.vstack((X_voice, padding))
         try:
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")
             mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
             progress(0.9, "Generating graphs")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
-                                                                      color=GRAPH_COLORS['facial_embeddings'],
                                                                       anomaly_threshold=anomaly_threshold)
             mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
-                                                          anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
             mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
-                                                                color=GRAPH_COLORS['body_posture'],
                                                                 anomaly_threshold=anomaly_threshold)
             mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
-                                                       anomaly_threshold, color=GRAPH_COLORS['body_posture'])
             mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
-                                                            color=GRAPH_COLORS['voice'],
                                                             anomaly_threshold=anomaly_threshold)
             mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
-                                                     anomaly_threshold, color=GRAPH_COLORS['voice'])
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
             mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
             mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
-            # Generate the correlation heatmap
             correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
-            # Generate the 3D scatter plot
             scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
-            try:
-                if progress is not None:
-                    progress(0.95, desc="Generating video with heatmap")
-                output_folder = "output"
-                heatmap_video_path = create_video_with_heatmap(
-                    video_path, df, mse_embeddings, mse_posture, mse_voice,
-                    output_folder, original_fps, largest_cluster
-                )
-                if heatmap_video_path is None:
-                    print("Failed to create heatmap video")
-                else:
-                    print(f"Heatmap video path from create_video_with_heatmap: {heatmap_video_path}")
-                if progress is not None:
-                    progress(1.0, desc="Video processing complete")
-            except Exception as e:
-                print(f"Error in create_video_with_heatmap: {str(e)}")
-                import traceback
-                traceback.print_exc()
-                heatmap_video_path = None
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
             traceback.print_exc()
-            return (f"Error in video processing: {str(e)}",) + (None,) * 23
         progress(1.0, "Preparing results")
         results = f"Number of persons detected: {num_clusters}\n\n"
@@ -314,6 +282,7 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
         )
 def is_frontal_face(landmarks, threshold=60):
     nose_tip = landmarks[4]
     left_chin = landmarks[234]

     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
     with tempfile.TemporaryDirectory() as temp_dir:
         aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
         organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.8, "Extracting audio and performing voice analysis")
         audio_path = extract_audio_from_video(video_path)
         diarization, most_frequent_speaker = diarize_speakers(audio_path)
         voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
         aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
         progress(0.85, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
         X_embeddings = df[embedding_columns].values
         X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
         X_posture = X_posture[X_posture != None].reshape(-1, 1)
         X_voice = np.array(aligned_voice_embeddings)
         if len(X_voice) > len(X_embeddings):
             X_voice = X_voice[:len(X_embeddings)]
         elif len(X_voice) < len(X_embeddings):
             padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
             X_voice = np.vstack((X_voice, padding))
         try:
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")
             mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
             progress(0.9, "Generating graphs")
             mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
+                                                                      color='navy',
                                                                       anomaly_threshold=anomaly_threshold)
             mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
+                                                          anomaly_threshold, color='navy')
             mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
+                                                                color='purple',
                                                                 anomaly_threshold=anomaly_threshold)
             mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
+                                                       anomaly_threshold, color='purple')
             mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
+                                                            color='green',
                                                             anomaly_threshold=anomaly_threshold)
             mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
+                                                     anomaly_threshold, color='green')
             mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
             mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
             mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
             correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
             scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
+            progress(0.95, "Generating video with heatmap")
+            heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice,
+                                                           output_folder, original_fps, largest_cluster)
+            progress(1.0, "Video processing complete")
         except Exception as e:
             print(f"Error details: {str(e)}")
             import traceback
             traceback.print_exc()
+            return (f"Error in video processing: {str(e)}",) + (None,) * 25
         progress(1.0, "Preparing results")
         results = f"Number of persons detected: {num_clusters}\n\n"
         )
 def is_frontal_face(landmarks, threshold=60):
     nose_tip = landmarks[4]
     left_chin = landmarks[234]