Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 19, 2024

Commit

21dc0af

verified ·

1 Parent(s): 86bd3cd

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -59

app.py CHANGED Viewed

@@ -35,8 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
 # Initialize models and other global variables
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.98, 0.98, 0.98], min_face_size=50,
-              selection_method='largest')
 model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
 mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
@@ -156,7 +155,6 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
 def cluster_faces(embeddings):
     if len(embeddings) < 2:
         print("Not enough faces for clustering. Assigning all to one cluster.")
@@ -228,7 +226,7 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
     return df, largest_cluster
-def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
     mean = np.mean(anomaly_scores)
     std = np.std(anomaly_scores)
     threshold = mean + z_threshold * std
@@ -239,7 +237,7 @@ def timecode_to_seconds(timecode):
     h, m, s = map(float, timecode.split(':'))
     return h * 3600 + m * 60 + s
-def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
     grouped = []
     current_group = []
@@ -282,7 +280,7 @@ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
     print(f"X shape after reshaping: {X.shape}")
-    train_size = int(0.85 * X.shape[1])
     X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
     model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
@@ -366,6 +364,34 @@ def plot_to_image(fig):
     buf.seek(0)
     return buf
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
@@ -373,25 +399,25 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    normalized_scores = normalize_scores(anomaly_scores)
-    seconds = df['Seconds'].values[1:]
-    scores = normalized_scores[1:]
     ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
-    top_indices = [idx for idx in top_indices if idx > 0]
-    ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
     # Calculate and plot baseline
-    non_anomalous_scores = np.delete(normalized_scores, top_indices)
     baseline = np.mean(non_anomalous_scores)
     ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
     ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
             verticalalignment='bottom', horizontalalignment='right', color='black')
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
-                                                normalized_scores[top_indices])
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
@@ -424,12 +450,14 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    seconds = df['Seconds'].values[1:]
-    scores = anomaly_scores[1:]
     ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
-    top_indices = [idx for idx in top_indices if idx > 0]
     ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
     # Calculate and plot baseline
@@ -467,28 +495,33 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
     plt.close()
     return fig
-def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
-    face_samples = []
-    for cluster_folder in os.listdir(organized_faces_folder):
         if cluster_folder.startswith("person_"):
             person_folder = os.path.join(organized_faces_folder, cluster_folder)
-            face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
             if face_files:
-                if int(cluster_folder.split('_')[1]) == largest_cluster:
-                    samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
                 else:
-                    samples = [np.random.choice(face_files)]
-                for i, sample in enumerate(samples):
-                    face_path = os.path.join(person_folder, sample)
-                    output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}_{i}.jpg")
-                    face_img = cv2.imread(face_path)
-                    if face_img is not None:
-                        small_face = cv2.resize(face_img, (160, 160))
-                        cv2.imwrite(output_path, small_face)
-                        face_samples.append(output_path)
     return face_samples
 def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
@@ -519,7 +552,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         if not aligned_face_paths:
             return ("No faces were extracted from the video.",
-                    None, None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering faces")
         embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
@@ -534,7 +567,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
                                                       original_fps, temp_dir, num_components, video_duration)
         progress(0.85, "Getting face samples")
-        face_samples = get_random_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
@@ -559,7 +592,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         except Exception as e:
             print(f"Error details: {str(e)}")
-            return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None, None
         progress(0.95, "Generating plots")
         try:
@@ -578,41 +611,36 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
                                           ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
             ]
         except Exception as e:
-            return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
         progress(1.0, "Preparing results")
         results = f"Number of persons/clusters detected: {num_clusters}\n\n"
         results += f"Breakdown of persons/clusters:\n"
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
-        results += f"\nAnomalies (Facial Features + Emotions):\n"
-        results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
-                              zip(anomaly_scores_all[top_indices_all[1:]],
-                                  df['Timecode'].iloc[top_indices_all[1:]].values)])
-        results += f"\n\nAnomalies (Facial Features):\n"
-        results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
-                              zip(anomaly_scores_comp[top_indices_comp[1:]],
-                                  df['Timecode'].iloc[top_indices_comp[1:]].values)])
-        for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
-            results += f"\n\n{emotion.capitalize()} Anomalies:\n"
-            results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
-                                  for i in emotion_anomalies[emotion]['indices'] if i > 0])
         return (
             results,
             anomaly_plot_all,
             anomaly_plot_comp,
             *emotion_plots,
-            face_samples
         )
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
-        gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
-        gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
     ],
     outputs=[
@@ -625,12 +653,11 @@ iface = gr.Interface(
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
-        gr.Gallery(label="Random Samples of Detected Persons", columns=[5], rows=[2], height="auto")
-    ],
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
-        It identifies distinct persons in the video and provides sample faces for each, with 10 samples for the most frequent person.
         Adjust the parameters as needed:
         - Number of Components: Complexity of the facial expression model
@@ -642,5 +669,5 @@ iface = gr.Interface(
     allow_flagging="never"
 )
-if __name__ == "__main__":
-    iface.launch()

 # Initialize models and other global variables
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.975, 0.975, 0.975], min_face_size=100)
 model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
 mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
 def cluster_faces(embeddings):
     if len(embeddings) < 2:
         print("Not enough faces for clustering. Assigning all to one cluster.")
     return df, largest_cluster
+def determine_optimal_anomalies(anomaly_scores, z_threshold=3.5):
     mean = np.mean(anomaly_scores)
     std = np.std(anomaly_scores)
     threshold = mean + z_threshold * std
     h, m, s = map(float, timecode.split(':'))
     return h * 3600 + m * 60 + s
+def group_similar_timecodes(timecodes, scores, threshold_seconds=10):
     grouped = []
     current_group = []
     print(f"X shape after reshaping: {X.shape}")
+    train_size = int(0.9 * X.shape[1])
     X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
     model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
     buf.seek(0)
     return buf
+def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    X = torch.FloatTensor(embeddings).to(device)
+    if X.dim() == 2:
+        X = X.unsqueeze(0)
+    elif X.dim() == 1:
+        X = X.unsqueeze(0).unsqueeze(2)
+    model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
+    criterion = nn.MSELoss()
+    optimizer = optim.Adam(model.parameters())
+    for epoch in range(epochs):
+        model.train()
+        optimizer.zero_grad()
+        output = model(X)
+        loss = criterion(output, X)
+        loss.backward()
+        optimizer.step()
+    model.eval()
+    with torch.no_grad():
+        reconstructed = model(X).squeeze(0).cpu().numpy()
+    mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
+    anomalies, top_indices = determine_optimal_anomalies(mse)
+    return anomalies, mse, top_indices
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
+    # Filter out data points without faces
+    valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
+    seconds = df['Seconds'].iloc[valid_indices].values
+    scores = anomaly_scores[valid_indices]
     ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
+    top_indices = [idx for idx in top_indices if idx in valid_indices]
+    ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
     # Calculate and plot baseline
+    non_anomalous_scores = np.delete(scores, top_indices)
     baseline = np.mean(non_anomalous_scores)
     ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
     ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
             verticalalignment='bottom', horizontalalignment='right', color='black')
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
+                                                scores[top_indices])
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
+    # Filter out data points without faces
+    valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
+    seconds = df['Seconds'].iloc[valid_indices].values
+    scores = anomaly_scores[valid_indices]
     ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
+    top_indices = [idx for idx in top_indices if idx in valid_indices]
     ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
     # Calculate and plot baseline
     plt.close()
     return fig
+def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster):
+    face_samples = {"most_frequent": [], "others": []}
+    for cluster_folder in sorted(os.listdir(organized_faces_folder)):
         if cluster_folder.startswith("person_"):
             person_folder = os.path.join(organized_faces_folder, cluster_folder)
+            face_files = sorted([f for f in os.listdir(person_folder) if f.endswith('.jpg')])
             if face_files:
+                cluster_id = int(cluster_folder.split('_')[1])
+                if cluster_id == largest_cluster:
+                    for i, sample in enumerate(face_files):
+                        face_path = os.path.join(person_folder, sample)
+                        output_path = os.path.join(output_folder, f"face_sample_most_frequent_{i:04d}.jpg")
+                        face_img = cv2.imread(face_path)
+                        if face_img is not None:
+                            small_face = cv2.resize(face_img, (160, 160))
+                            cv2.imwrite(output_path, small_face)
+                            face_samples["most_frequent"].append(output_path)
                 else:
+                    for i, sample in enumerate(face_files):
+                        face_path = os.path.join(person_folder, sample)
+                        output_path = os.path.join(output_folder, f"face_sample_other_{cluster_id:02d}_{i:04d}.jpg")
+                        face_img = cv2.imread(face_path)
+                        if face_img is not None:
+                            small_face = cv2.resize(face_img, (160, 160))
+                            cv2.imwrite(output_path, small_face)
+                            face_samples["others"].append(output_path)
     return face_samples
 def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
         if not aligned_face_paths:
             return ("No faces were extracted from the video.",
+                    None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering faces")
         embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
                                                       original_fps, temp_dir, num_components, video_duration)
         progress(0.85, "Getting face samples")
+        face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
         except Exception as e:
             print(f"Error details: {str(e)}")
+            return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None
         progress(0.95, "Generating plots")
         try:
                                           ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
             ]
         except Exception as e:
+            return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None
         progress(1.0, "Preparing results")
         results = f"Number of persons/clusters detected: {num_clusters}\n\n"
         results += f"Breakdown of persons/clusters:\n"
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
         return (
             results,
             anomaly_plot_all,
             anomaly_plot_comp,
             *emotion_plots,
+            face_samples["most_frequent"],
+            face_samples["others"]
         )
+gallery_outputs = [
+    gr.Gallery(label="Most Frequent Person Random Samples", columns=5, rows=2, height="auto"),
+    gr.Gallery(label="Other Persons Random Samples", columns=5, rows=1, height="auto")
+]
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
+        gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Components"),
+        gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
     ],
     outputs=[
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
+    ] + gallery_outputs,
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
+        It identifies distinct persons in the video and provides sample faces for each, with multiple samples for the most frequent person.
         Adjust the parameters as needed:
         - Number of Components: Complexity of the facial expression model
     allow_flagging="never"
 )
+iface.launch()