Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Sleeping

App Files Files Community

reab5555 commited on Jul 19, 2024

Commit

86bd3cd

verified ·

1 Parent(s): d431c9d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -44

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ from PIL import Image
 import gradio as gr
 import tempfile
 import shutil
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -34,7 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
 # Initialize models and other global variables
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
               selection_method='largest')
 model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
 mp_face_mesh = mp.solutions.face_mesh
@@ -155,31 +156,20 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
-def cluster_faces(face_images):
-    if len(face_images) < 2:
-        print("Not enough faces for clustering. Assigning all to one cluster.")
-        return np.zeros(len(face_images), dtype=int)
-    # Resize all images to a consistent size
-    resized_faces = [cv2.resize(face, (224, 224)) for face in face_images]
-    # Convert images to grayscale and flatten
-    gray_faces = [cv2.cvtColor(face, cv2.COLOR_BGR2GRAY).flatten() for face in resized_faces]
-    # Stack the flattened images
-    X = np.stack(gray_faces)
-    # Normalize the pixel values
-    X = X / 255.0
-    # Perform DBSCAN clustering
-    dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
     clusters = dbscan.fit_predict(X)
-    # If DBSCAN assigns all to noise (-1), consider it as one cluster
     if np.all(clusters == -1):
         print("DBSCAN assigned all to noise. Considering as one cluster.")
-        return np.zeros(len(face_images), dtype=int)
     return clusters
@@ -245,12 +235,10 @@ def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
     anomalies = anomaly_scores > threshold
     return anomalies, np.where(anomalies)[0]
 def timecode_to_seconds(timecode):
     h, m, s = map(float, timecode.split(':'))
     return h * 3600 + m * 60 + s
 def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
     grouped = []
     current_group = []
@@ -372,32 +360,39 @@ def normalize_scores(scores):
         return np.full_like(scores, 100)
     return ((scores - min_score) / (max_score - min_score)) * 100
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
-    plt.figure(figsize=(16, 8), dpi=500)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Normalize scores
     normalized_scores = normalize_scores(anomaly_scores)
-    # Omit the first data point
     seconds = df['Seconds'].values[1:]
     scores = normalized_scores[1:]
-    # Create scatter plot
     ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
-    # Highlight top anomalies (excluding the first data point)
     top_indices = [idx for idx in top_indices if idx > 0]
     ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
-    # Group similar timecodes
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
                                                 normalized_scores[top_indices])
-    # Add timecode annotations for grouped timecodes
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
         timecode, score, idx = group[max_score_idx]
@@ -415,35 +410,39 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
     ax.set_xlabel('Time')
     ax.set_ylabel('Anomaly Score')
-    ax.set_title(f'Anomaly Scores ({title})')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
 def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
-    plt.figure(figsize=(16, 8), dpi=500)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Omit the first data point
     seconds = df['Seconds'].values[1:]
     scores = anomaly_scores[1:]
-    # Create scatter plot
     ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
-    # Highlight top anomalies (excluding the first data point)
     top_indices = [idx for idx in top_indices if idx > 0]
     ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
-    # Group similar timecodes
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
                                                 anomaly_scores[top_indices])
-    # Add timecode annotations for grouped timecodes
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
         timecode, score, idx = group[max_score_idx]
@@ -465,6 +464,7 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
 def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
@@ -475,10 +475,8 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
             face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
             if face_files:
                 if int(cluster_folder.split('_')[1]) == largest_cluster:
-                    # Get 10 samples for the largest cluster
                     samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
                 else:
-                    # Get 1 sample for other clusters
                     samples = [np.random.choice(face_files)]
                 for i, sample in enumerate(samples):
@@ -491,7 +489,6 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
                         face_samples.append(output_path)
     return face_samples
 def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
@@ -525,8 +522,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
                     None, None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering faces")
-        face_images = [cv2.imread(path) for path in aligned_face_paths]
-        clusters = cluster_faces(face_images)
         num_clusters = len(set(clusters))  # Get the number of unique clusters
         progress(0.7, "Organizing faces")
@@ -548,11 +545,9 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
             anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
                 X, feature_columns, batch_size=batch_size)
-            # Normalize anomaly scores
             anomaly_scores_all = normalize_scores(anomaly_scores_all)
             anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
-            # Perform anomaly detection for each emotion using LSTM autoencoder
             emotion_anomalies = {}
             for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
                 anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
@@ -568,7 +563,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         progress(0.95, "Generating plots")
         try:
-            anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all, "Facial Features + Emotions",
                                                    df['Timecode'].iloc[top_indices_all].values)
             anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
                                                     df['Timecode'].iloc[top_indices_comp].values)
@@ -611,7 +607,6 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
             face_samples
         )
 iface = gr.Interface(
     fn=process_video,
     inputs=[
@@ -641,6 +636,8 @@ iface = gr.Interface(
         - Number of Components: Complexity of the facial expression model
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage
         """,
     allow_flagging="never"
 )

 import gradio as gr
 import tempfile
 import shutil
+import io
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 # Initialize models and other global variables
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.98, 0.98, 0.98], min_face_size=50,
               selection_method='largest')
 model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
 mp_face_mesh = mp.solutions.face_mesh
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
+def cluster_faces(embeddings):
+    if len(embeddings) < 2:
+        print("Not enough faces for clustering. Assigning all to one cluster.")
+        return np.zeros(len(embeddings), dtype=int)
+    X = np.stack(embeddings)
+    dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
     clusters = dbscan.fit_predict(X)
     if np.all(clusters == -1):
         print("DBSCAN assigned all to noise. Considering as one cluster.")
+        return np.zeros(len(embeddings), dtype=int)
     return clusters
     anomalies = anomaly_scores > threshold
     return anomalies, np.where(anomalies)[0]
 def timecode_to_seconds(timecode):
     h, m, s = map(float, timecode.split(':'))
     return h * 3600 + m * 60 + s
 def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
     grouped = []
     current_group = []
         return np.full_like(scores, 100)
     return ((scores - min_score) / (max_score - min_score)) * 100
+def plot_to_image(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
+    buf.seek(0)
+    return buf
 def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
+    plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
     normalized_scores = normalize_scores(anomaly_scores)
     seconds = df['Seconds'].values[1:]
     scores = normalized_scores[1:]
     ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
     top_indices = [idx for idx in top_indices if idx > 0]
     ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
+    # Calculate and plot baseline
+    non_anomalous_scores = np.delete(normalized_scores, top_indices)
+    baseline = np.mean(non_anomalous_scores)
+    ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
+    ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
+            verticalalignment='bottom', horizontalalignment='right', color='black')
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
                                                 normalized_scores[top_indices])
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
         timecode, score, idx = group[max_score_idx]
     ax.set_xlabel('Time')
     ax.set_ylabel('Anomaly Score')
+    ax.set_title(title)
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
+    plt.close()
     return fig
 def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
+    plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
     seconds = df['Seconds'].values[1:]
     scores = anomaly_scores[1:]
     ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
     top_indices = [idx for idx in top_indices if idx > 0]
     ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
+    # Calculate and plot baseline
+    non_anomalous_scores = np.delete(anomaly_scores, top_indices)
+    baseline = np.mean(non_anomalous_scores)
+    ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
+    ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
+            verticalalignment='bottom', horizontalalignment='right', color='black')
     grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
                                                 anomaly_scores[top_indices])
     for group in grouped_timecodes:
         max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
         timecode, score, idx = group[max_score_idx]
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
+    plt.close()
     return fig
 def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
             face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
             if face_files:
                 if int(cluster_folder.split('_')[1]) == largest_cluster:
                     samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
                 else:
                     samples = [np.random.choice(face_files)]
                 for i, sample in enumerate(samples):
                         face_samples.append(output_path)
     return face_samples
 def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
                     None, None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering faces")
+        embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
+        clusters = cluster_faces(embeddings)
         num_clusters = len(set(clusters))  # Get the number of unique clusters
         progress(0.7, "Organizing faces")
             anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
                 X, feature_columns, batch_size=batch_size)
             anomaly_scores_all = normalize_scores(anomaly_scores_all)
             anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
             emotion_anomalies = {}
             for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
                 anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
         progress(0.95, "Generating plots")
         try:
+            anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all,
+                                                   "Facial Features + Emotions",
                                                    df['Timecode'].iloc[top_indices_all].values)
             anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
                                                     df['Timecode'].iloc[top_indices_comp].values)
             face_samples
         )
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         - Number of Components: Complexity of the facial expression model
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage
+        Click on any graph to enlarge it.
         """,
     allow_flagging="never"
 )