Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 19, 2024

Commit

fd4c3a4

verified ·

1 Parent(s): 191a4f0

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -41

app.py CHANGED Viewed

@@ -173,7 +173,7 @@ def cluster_faces(face_images):
     X = X / 255.0
     # Perform DBSCAN clustering
-    dbscan = DBSCAN(eps=0.3, min_samples=3, metric='euclidean')
     clusters = dbscan.fit_predict(X)
     # If DBSCAN assigns all to noise (-1), consider it as one cluster
@@ -238,6 +238,13 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
     return df, largest_cluster
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
         super(LSTMAutoencoder, self).__init__()
@@ -252,7 +259,7 @@ class LSTMAutoencoder(nn.Module):
         out = self.fc(outputs)
         return out
-def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(X).to(device)
     if X.dim() == 2:
@@ -289,9 +296,7 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
-    top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
-    anomalies_all = np.zeros(len(mse_all), dtype=bool)
-    anomalies_all[top_indices_all] = True
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
@@ -302,15 +307,13 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
     else:
         mse_comp = mse_all
-    top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
-    anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
-    anomalies_comp[top_indices_comp] = True
     return (anomalies_all, mse_all, top_indices_all,
             anomalies_comp, mse_comp, top_indices_comp,
             model)
-def emotion_anomaly_detection(emotion_data, num_anomalies=10, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(emotion_data.values).to(device)
     if X.dim() == 1:
@@ -335,9 +338,7 @@ def emotion_anomaly_detection(emotion_data, num_anomalies=10, epochs=100, batch_
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
-    top_indices = mse.argsort()[-num_anomalies:][::-1]
-    anomalies = np.zeros(len(mse), dtype=bool)
-    anomalies[top_indices] = True
     return anomalies, mse, top_indices
@@ -350,7 +351,7 @@ def normalize_scores(scores):
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
-    plt.figure(figsize=(16, 8), dpi=400)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
@@ -379,7 +380,7 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
-    num_ticks = 80
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
     ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
                        rotation=90, ha='center', va='top')
@@ -392,8 +393,8 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     plt.tight_layout()
     return fig
-def plot_emotion(df, emotion, anomaly_scores, top_indices, num_anomalies, color, timecodes):
-    plt.figure(figsize=(16, 8), dpi=400)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
@@ -419,38 +420,45 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, num_anomalies, color,
     max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
-    num_ticks = 80
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
     ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
                        rotation=90, ha='center', va='top')
     ax.set_xlabel('Time')
     ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
-    ax.set_title(f'{emotion.capitalize()} Anomaly Scores (Top {num_anomalies} in Red)')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
-def get_random_face_samples(organized_faces_folder, output_folder):
     face_samples = []
     for cluster_folder in os.listdir(organized_faces_folder):
         if cluster_folder.startswith("person_"):
             person_folder = os.path.join(organized_faces_folder, cluster_folder)
             face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
             if face_files:
-                random_face = np.random.choice(face_files)
-                face_path = os.path.join(person_folder, random_face)
-                output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}.jpg")
-                face_img = cv2.imread(face_path)
-                if face_img is not None:
-                    small_face = cv2.resize(face_img, (224, 224))
-                    cv2.imwrite(output_path, small_face)
-                    face_samples.append(output_path)
     return face_samples
-def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
@@ -490,13 +498,13 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         progress(0.7, "Organizing faces")
         organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
-        progress(0.75, "Getting face samples")
-        face_samples = get_random_face_samples(organized_faces_folder, output_folder)
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
                                                       original_fps, temp_dir, num_components, video_duration)
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
                            col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
@@ -504,7 +512,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         try:
             anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
-                X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
             # Normalize anomaly scores
             anomaly_scores_all = normalize_scores(anomaly_scores_all)
@@ -513,7 +521,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
             # Perform anomaly detection for each emotion using LSTM autoencoder
             emotion_anomalies = {}
             for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
-                anomalies, scores, indices = emotion_anomaly_detection(df[emotion], num_anomalies=num_anomalies)
                 emotion_anomalies[emotion] = {
                     'anomalies': anomalies,
                     'scores': normalize_scores(scores),
@@ -534,7 +542,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
                 plot_emotion(df, emotion,
                              emotion_anomalies[emotion]['scores'],
                              emotion_anomalies[emotion]['indices'],
-                             num_anomalies,
                              color,
                              df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
                 for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
@@ -548,17 +555,17 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         results += f"Breakdown of persons/clusters:\n"
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
-        results += f"\nTop {num_anomalies} anomalies (Facial Features + Emotions):\n"
         results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_all[top_indices_all[1:]],
                                   df['Timecode'].iloc[top_indices_all[1:]].values)])
-        results += f"\n\nTop {num_anomalies} anomalies (Facial Features):\n"
         results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_comp[top_indices_comp[1:]],
                                   df['Timecode'].iloc[top_indices_comp[1:]].values)])
         for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
-            results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Anomalies:\n"
             results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
                                   for i in emotion_anomalies[emotion]['indices'] if i > 0])
@@ -575,7 +582,6 @@ iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
-        gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Anomalies"),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
         gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
@@ -590,15 +596,14 @@ iface = gr.Interface(
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
-        gr.Gallery(label="Detected Persons", columns=[2], rows=[1], height="auto")
     ],
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
-        It identifies distinct persons in the video and provides a sample face for each.
         Adjust the parameters as needed:
-        - Number of Anomalies: How many top anomalies or high intensities to highlight
         - Number of Components: Complexity of the facial expression model
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage

     X = X / 255.0
     # Perform DBSCAN clustering
+    dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
     clusters = dbscan.fit_predict(X)
     # If DBSCAN assigns all to noise (-1), consider it as one cluster
     return df, largest_cluster
+def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
+    mean = np.mean(anomaly_scores)
+    std = np.std(anomaly_scores)
+    threshold = mean + z_threshold * std
+    anomalies = anomaly_scores > threshold
+    return anomalies, np.where(anomalies)[0]
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
         super(LSTMAutoencoder, self).__init__()
         out = self.fc(outputs)
         return out
+def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(X).to(device)
     if X.dim() == 2:
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
+    anomalies_all, top_indices_all = determine_optimal_anomalies(mse_all)
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
     else:
         mse_comp = mse_all
+    anomalies_comp, top_indices_comp = determine_optimal_anomalies(mse_comp)
     return (anomalies_all, mse_all, top_indices_all,
             anomalies_comp, mse_comp, top_indices_comp,
             model)
+def emotion_anomaly_detection(emotion_data, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(emotion_data.values).to(device)
     if X.dim() == 1:
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
+    anomalies, top_indices = determine_optimal_anomalies(mse)
     return anomalies, mse, top_indices
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
+    plt.figure(figsize=(16, 8), dpi=500)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
     max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
+    num_ticks = 100
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
     ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
                        rotation=90, ha='center', va='top')
     plt.tight_layout()
     return fig
+def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
+    plt.figure(figsize=(16, 8), dpi=500)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
     max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
+    num_ticks = 100
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
     ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
                        rotation=90, ha='center', va='top')
     ax.set_xlabel('Time')
     ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
+    ax.set_title(f'{emotion.capitalize()} Anomaly Scores')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
+def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
     face_samples = []
     for cluster_folder in os.listdir(organized_faces_folder):
         if cluster_folder.startswith("person_"):
             person_folder = os.path.join(organized_faces_folder, cluster_folder)
             face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
             if face_files:
+                if int(cluster_folder.split('_')[1]) == largest_cluster:
+                    # Get 10 samples for the largest cluster
+                    samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
+                else:
+                    # Get 1 sample for other clusters
+                    samples = [np.random.choice(face_files)]
+                for i, sample in enumerate(samples):
+                    face_path = os.path.join(person_folder, sample)
+                    output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}_{i}.jpg")
+                    face_img = cv2.imread(face_path)
+                    if face_img is not None:
+                        small_face = cv2.resize(face_img, (160, 160))
+                        cv2.imwrite(output_path, small_face)
+                        face_samples.append(output_path)
     return face_samples
+def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
         progress(0.7, "Organizing faces")
         organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
                                                       original_fps, temp_dir, num_components, video_duration)
+        progress(0.85, "Getting face samples")
+        face_samples = get_random_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
                            col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
         try:
             anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
+                X, feature_columns, batch_size=batch_size)
             # Normalize anomaly scores
             anomaly_scores_all = normalize_scores(anomaly_scores_all)
             # Perform anomaly detection for each emotion using LSTM autoencoder
             emotion_anomalies = {}
             for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
+                anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
                 emotion_anomalies[emotion] = {
                     'anomalies': anomalies,
                     'scores': normalize_scores(scores),
                 plot_emotion(df, emotion,
                              emotion_anomalies[emotion]['scores'],
                              emotion_anomalies[emotion]['indices'],
                              color,
                              df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
                 for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
         results += f"Breakdown of persons/clusters:\n"
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
+        results += f"\nAnomalies (Facial Features + Emotions):\n"
         results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_all[top_indices_all[1:]],
                                   df['Timecode'].iloc[top_indices_all[1:]].values)])
+        results += f"\n\nAnomalies (Facial Features):\n"
         results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_comp[top_indices_comp[1:]],
                                   df['Timecode'].iloc[top_indices_comp[1:]].values)])
         for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
+            results += f"\n\n{emotion.capitalize()} Anomalies:\n"
             results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
                                   for i in emotion_anomalies[emotion]['indices'] if i > 0])
     fn=process_video,
     inputs=[
         gr.Video(),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
         gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
+        gr.Gallery(label="Detected Persons", columns=[5], rows=[2], height="auto")
     ],
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
+        It identifies distinct persons in the video and provides sample faces for each, with 10 samples for the most frequent person.
         Adjust the parameters as needed:
         - Number of Components: Complexity of the facial expression model
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage