Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 15, 2024

Commit

5fcde85

verified ·

1 Parent(s): 0702c36

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -126

app.py CHANGED Viewed

@@ -101,12 +101,7 @@ def extract_frames(video_path, output_folder, fps):
         print(f"FFmpeg stderr: {e.stderr}")
         raise
-def extract_and_align_faces_from_video(video_path, aligned_faces_folder, desired_fps, progress=gr.Progress()):
-    print(f"Processing video: {video_path}")
-    frames_folder = os.path.join(os.path.dirname(aligned_faces_folder), 'extracted_frames')
-    extract_frames(video_path, frames_folder, desired_fps)
     ffprobe_command = [
         'ffprobe',
         '-v', 'error',
@@ -116,63 +111,42 @@ def extract_and_align_faces_from_video(video_path, aligned_faces_folder, desired
         '-of', 'csv=p=0',
         video_path
     ]
-    try:
-        ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
-        print(f"FFprobe output: {ffprobe_output}")
-        if len(ffprobe_output) != 2:
-            raise ValueError(f"Unexpected FFprobe output format: {ffprobe_output}")
-        frame_rate, frame_count = ffprobe_output
-        print(f"Frame count (raw): {frame_count}")
-        print(f"Frame rate (raw): {frame_rate}")
-        try:
-            frac = fractions.Fraction(frame_rate)
-            original_fps = float(frac.numerator) / float(frac.denominator)
-        except (ValueError, ZeroDivisionError):
-            print(f"Warning: Could not convert frame rate '{frame_rate}' to float. Using fallback method.")
-            duration_command = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path]
-            duration = float(subprocess.check_output(duration_command, universal_newlines=True).strip())
-            original_fps = int(frame_count) / duration
-        frame_count = int(frame_count)
-    except subprocess.CalledProcessError as e:
-        print(f"Error running FFprobe: {e}")
-        raise
-    except Exception as e:
-        print(f"Unexpected error processing video info: {e}")
-        raise
-    print(f"Total frames: {frame_count}, Original FPS: {original_fps}, Desired FPS: {desired_fps}")
     embeddings_by_frame = {}
     emotions_by_frame = {}
-    for i, frame_file in enumerate(sorted(os.listdir(frames_folder))):
-        if frame_file.endswith('.jpg'):
             frame_num = int(frame_file.split('_')[1].split('.')[0])
             frame_path = os.path.join(frames_folder, frame_file)
             frame = cv2.imread(frame_path)
-            progress((i + 1) / len(os.listdir(frames_folder)), f"Processing frame {i + 1} of {len(os.listdir(frames_folder))}")
-            if frame is None:
-                print(f"Skipping frame {frame_num}: Could not read frame")
-                continue
-            try:
-                boxes, probs = mtcnn.detect(frame)
-                if boxes is not None and len(boxes) > 0:
-                    box = boxes[0]
-                    if probs[0] >= 0.99:
-                        x1, y1, x2, y2 = [int(b) for b in box]
-                        face = frame[y1:y2, x1:x2]
-                        if face.size == 0:
-                            print(f"Skipping frame {frame_num}: Detected face region is empty")
-                            continue
                         aligned_face = alignFace(face)
                         if aligned_face is not None:
                             aligned_face_resized = cv2.resize(aligned_face, (160, 160))
@@ -181,11 +155,10 @@ def extract_and_align_faces_from_video(video_path, aligned_faces_folder, desired
                             embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
                             embeddings_by_frame[frame_num] = embedding
                             emotions_by_frame[frame_num] = emotion
-            except Exception as e:
-                print(f"Error processing frame {frame_num}: {str(e)}")
-                continue
-    return embeddings_by_frame, emotions_by_frame, desired_fps, original_fps
 def cluster_embeddings(embeddings):
     if len(embeddings) < 2:
@@ -273,7 +246,9 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
     # Ensure X is 2D
     if X.dim() == 1:
-        X = X.unsqueeze(0)
     train_size = int(0.85 * len(X))
     X_train, X_val = X[:train_size], X[train_size:]
@@ -310,7 +285,10 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
     component_indices = [feature_columns.index(col) for col in component_columns]
     if len(component_indices) > 0:
-        mse_comp = np.mean(np.power(X.cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
     else:
         mse_comp = mse_all  # If no components, use all features
@@ -319,8 +297,8 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
     anomalies_comp[top_indices_comp] = True
     return (anomalies_all, mse_all, top_indices_all,
-            anomalies_comp, mse_comp, top_indices_comp,
-            model)
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
     fig, ax = plt.subplots(figsize=(16, 8))
@@ -379,7 +357,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         frame_count, original_fps = get_video_info(video_path)
         progress(0.3, "Processing frames")
-        embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count, progress)
         if not embeddings_by_frame:
             return "No faces were extracted from the video.", None, None, None, None, None, None
@@ -396,8 +374,10 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
         try:
-            anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(df[feature_columns].values, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
         except Exception as e:
             return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
@@ -425,65 +405,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         return results, anomaly_plot_all, anomaly_plot_comp, components_plot, *emotion_plots
-def get_video_info(video_path):
-    ffprobe_command = [
-        'ffprobe',
-        '-v', 'error',
-        '-select_streams', 'v:0',
-        '-count_packets',
-        '-show_entries', 'stream=nb_read_packets,r_frame_rate',
-        '-of', 'csv=p=0',
-        video_path
-    ]
-    ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
-    frame_rate, frame_count = ffprobe_output
-    frac = fractions.Fraction(frame_rate)
-    original_fps = float(frac.numerator) / float(frac.denominator)
-    frame_count = int(frame_count)
-    return frame_count, original_fps
-def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
-    embeddings_by_frame = {}
-    emotions_by_frame = {}
-    for i, frame_file in enumerate(sorted(os.listdir(frames_folder))):
-        if frame_file.endswith('.jpg'):
-            frame_num = int(frame_file.split('_')[1].split('.')[0])
-            frame_path = os.path.join(frames_folder, frame_file)
-            frame = cv2.imread(frame_path)
-            progress((i + 1) / frame_count, f"Processing frame {i + 1} of {frame_count}")
-            if frame is None:
-                print(f"Skipping frame {frame_num}: Could not read frame")
-                continue
-            try:
-                boxes, probs = mtcnn.detect(frame)
-                if boxes is not None and len(boxes) > 0:
-                    box = boxes[0]
-                    if probs[0] >= 0.99:
-                        x1, y1, x2, y2 = [int(b) for b in box]
-                        face = frame[y1:y2, x1:x2]
-                        if face.size == 0:
-                            print(f"Skipping frame {frame_num}: Detected face region is empty")
-                            continue
-                        aligned_face = alignFace(face)
-                        if aligned_face is not None:
-                            aligned_face_resized = cv2.resize(aligned_face, (160, 160))
-                            output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
-                            cv2.imwrite(output_path, aligned_face_resized)
-                            embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
-                            embeddings_by_frame[frame_num] = embedding
-                            emotions_by_frame[frame_num] = emotion
-            except Exception as e:
-                print(f"Error processing frame {frame_num}: {str(e)}")
-                continue
-    return embeddings_by_frame, emotions_by_frame
 # Gradio interface
 iface = gr.Interface(
     fn=process_video,
@@ -513,7 +434,6 @@ iface = gr.Interface(
     - Number of Components: Complexity of the facial expression model
     - Desired FPS: Frames per second to analyze (lower for faster processing)
     - Batch Size: Affects processing speed and memory usage
     """
 )

         print(f"FFmpeg stderr: {e.stderr}")
         raise
+def get_video_info(video_path):
     ffprobe_command = [
         'ffprobe',
         '-v', 'error',
         '-of', 'csv=p=0',
         video_path
     ]
+    ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
+    frame_rate, frame_count = ffprobe_output
+    frac = fractions.Fraction(frame_rate)
+    original_fps = float(frac.numerator) / float(frac.denominator)
+    frame_count = int(frame_count)
+    return frame_count, original_fps
+def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
     embeddings_by_frame = {}
     emotions_by_frame = {}
+    frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
+    for i in range(0, len(frame_files), batch_size):
+        batch_files = frame_files[i:i+batch_size]
+        batch_frames = []
+        batch_nums = []
+        for frame_file in batch_files:
             frame_num = int(frame_file.split('_')[1].split('.')[0])
             frame_path = os.path.join(frames_folder, frame_file)
             frame = cv2.imread(frame_path)
+            if frame is not None:
+                batch_frames.append(frame)
+                batch_nums.append(frame_num)
+        if batch_frames:
+            # Detect faces in batch
+            batch_boxes, batch_probs = mtcnn.detect(batch_frames)
+            for j, (frame, frame_num, boxes, probs) in enumerate(zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
+                if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
+                    x1, y1, x2, y2 = [int(b) for b in boxes[0]]
+                    face = frame[y1:y2, x1:x2]
+                    if face.size > 0:
                         aligned_face = alignFace(face)
                         if aligned_face is not None:
                             aligned_face_resized = cv2.resize(aligned_face, (160, 160))
                             embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
                             embeddings_by_frame[frame_num] = embedding
                             emotions_by_frame[frame_num] = emotion
+        progress((i + len(batch_files)) / frame_count, f"Processing frames {i + 1} to {min(i + len(batch_files), frame_count)} of {frame_count}")
+    return embeddings_by_frame, emotions_by_frame
 def cluster_embeddings(embeddings):
     if len(embeddings) < 2:
     # Ensure X is 2D
     if X.dim() == 1:
+        X = X.unsqueeze(1)  # Add a feature dimension
+    elif X.dim() > 2:
+        raise ValueError(f"Input X should be 1D or 2D, but got {X.dim()} dimensions")
     train_size = int(0.85 * len(X))
     X_train, X_val = X[:train_size], X[train_size:]
     component_indices = [feature_columns.index(col) for col in component_columns]
     if len(component_indices) > 0:
+        if X.dim() == 1:
+            mse_comp = mse_all  # If X is 1D, we can't select specific components
+        else:
+            mse_comp = np.mean(np.power(X.cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
     else:
         mse_comp = mse_all  # If no components, use all features
     anomalies_comp[top_indices_comp] = True
     return (anomalies_all, mse_all, top_indices_all,
+                anomalies_comp, mse_comp, top_indices_comp,
+                model)
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
     fig, ax = plt.subplots(figsize=(16, 8))
         frame_count, original_fps = get_video_info(video_path)
         progress(0.3, "Processing frames")
+        embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size)
         if not embeddings_by_frame:
             return "No faces were extracted from the video.", None, None, None, None, None, None
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
+        X = df[feature_columns].values
+        print(f"Shape of input data: {X.shape}")  # Debug print
         try:
+            anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
         except Exception as e:
             return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
         return results, anomaly_plot_all, anomaly_plot_comp, components_plot, *emotion_plots
 # Gradio interface
 iface = gr.Interface(
     fn=process_video,
     - Number of Components: Complexity of the facial expression model
     - Desired FPS: Frames per second to analyze (lower for faster processing)
     - Batch Size: Affects processing speed and memory usage
     """
 )