Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 18, 2024

Commit

981f52f

verified ·

1 Parent(s): 01f0185

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -112

app.py CHANGED Viewed

@@ -8,8 +8,10 @@ import seaborn as sns
 from facenet_pytorch import InceptionResnetV1, MTCNN
 import mediapipe as mp
 from fer import FER
-from sklearn.cluster import KMeans
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 import umap
 import pandas as pd
 import matplotlib
@@ -20,7 +22,6 @@ import gradio as gr
 import tempfile
 import shutil
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 import tensorflow as tf
@@ -31,7 +32,7 @@ matplotlib.rcParams['figure.dpi'] = 400
 matplotlib.rcParams['savefig.dpi'] = 400
 # Initialize models and other global variables
-device = 'cuda'
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
               selection_method='largest')
@@ -40,7 +41,6 @@ mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
 emotion_detector = FER(mtcnn=False)
 def frame_to_timecode(frame_num, total_frames, duration):
     total_seconds = (frame_num / total_frames) * duration
     hours = int(total_seconds // 3600)
@@ -49,7 +49,6 @@ def frame_to_timecode(frame_num, total_frames, duration):
     milliseconds = int((total_seconds - int(total_seconds)) * 1000)
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
 def get_face_embedding_and_emotion(face_img):
     face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
     face_tensor = (face_tensor - 0.5) / 0.5
@@ -65,7 +64,6 @@ def get_face_embedding_and_emotion(face_img):
     return embedding.cpu().numpy().flatten(), emotion_dict
 def alignFace(img):
     img_raw = img.copy()
     results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -91,13 +89,9 @@ def alignFace(img):
     new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
     return new_img
 def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
     os.makedirs(output_folder, exist_ok=True)
-    # Load the video clip
     clip = VideoFileClip(video_path)
     original_fps = clip.fps
     duration = clip.duration
     total_frames = int(duration * original_fps)
@@ -106,23 +100,15 @@ def extract_frames(video_path, output_folder, desired_fps, progress_callback=Non
     frame_count = 0
     for t in np.arange(0, duration, step / original_fps):
-        # Get the frame at time t
         frame = clip.get_frame(t)
-        # Convert the frame to PIL Image and save it
         img = Image.fromarray(frame)
         img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
         frame_count += 1
-        # Report progress
         if progress_callback:
             progress = min(100, (frame_count / total_frames_to_extract) * 100)
             progress_callback(progress, f"Extracting frame")
         if frame_count >= total_frames_to_extract:
             break
     clip.close()
     return frame_count, original_fps
@@ -145,7 +131,6 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
                 batch_nums.append(frame_num)
         if batch_frames:
-            # Detect faces in batch
             batch_boxes, batch_probs = mtcnn.detect(batch_frames)
             for j, (frame, frame_num, boxes, probs) in enumerate(
@@ -173,13 +158,30 @@ def cluster_embeddings(embeddings):
     if len(embeddings) < 2:
         print("Not enough embeddings for clustering. Assigning all to one cluster.")
         return np.zeros(len(embeddings), dtype=int)
-    n_clusters = min(3, len(embeddings))  # Use at most 3 clusters
     scaler = StandardScaler()
     embeddings_scaled = scaler.fit_transform(embeddings)
-    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
-    clusters = kmeans.fit_predict(embeddings_scaled)
-    return clusters
 def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
     for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
@@ -189,7 +191,6 @@ def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder
         dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
         shutil.copy(src, dst)
 def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
                             num_components, video_duration):
     emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
@@ -237,14 +238,12 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
     return df, largest_cluster
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
         super(LSTMAutoencoder, self).__init__()
         self.input_size = input_size
         self.hidden_size = hidden_size
         self.num_layers = num_layers
         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
         self.fc = nn.Linear(hidden_size, input_size)
@@ -253,13 +252,9 @@ class LSTMAutoencoder(nn.Module):
         out = self.fc(outputs)
         return out
 def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
-    device = 'cuda'
     X = torch.FloatTensor(X).to(device)
-    # Ensure X is 3D (batch, sequence, features)
     if X.dim() == 2:
         X = X.unsqueeze(0)
     elif X.dim() == 1:
@@ -293,13 +288,11 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
     with torch.no_grad():
         reconstructed = model(X).squeeze(0).cpu().numpy()
-    # Compute anomalies for all features
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
     top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
     anomalies_all = np.zeros(len(mse_all), dtype=bool)
     anomalies_all[top_indices_all] = True
-    # Compute anomalies for components only
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
@@ -307,7 +300,7 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
         mse_comp = np.mean(
             np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
     else:
-        mse_comp = mse_all  # If no components, use all features
     top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
     anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
@@ -317,98 +310,130 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
             anomalies_comp, mse_comp, top_indices_comp,
             model)
-def plot_emotion(df, emotion, num_anomalies, color):
-    plt.figure(figsize=(16, 8), dpi=400)  # Increase DPI for higher quality
     fig, ax = plt.subplots(figsize=(16, 8))
-    # Convert timecodes to seconds for proper plotting
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Create a DataFrame for seaborn
-    plot_df = pd.DataFrame({
-        'Seconds': df['Seconds'],
-        'Emotion Score': df[emotion]
-    })
-    # Plot using seaborn
-    sns.lineplot(x='Seconds', y='Emotion Score', data=plot_df, ax=ax, color=color)
-    # Highlight top anomalies
-    top_indices = np.argsort(df[emotion].values)[-num_anomalies:][::-1]
-    ax.scatter(df['Seconds'].iloc[top_indices], df[emotion].iloc[top_indices], color='red', s=50, zorder=5)
-    # Set x-axis
-    max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
-    num_ticks = 80  # Reduce number of ticks for emotion graphs
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
-    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()], rotation=90, ha='right')
     ax.set_xlabel('Time')
-    ax.set_ylabel(f'{emotion.capitalize()} Score')
-    ax.set_title(f'{emotion.capitalize()} Scores Over Time (Top {num_anomalies} in Red)')
-    # Add grid
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
-def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
-    plt.figure(figsize=(16, 8), dpi=400)  # Increase DPI for higher quality
     fig, ax = plt.subplots(figsize=(16, 8))
-    # Convert timecodes to seconds for proper plotting
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Create a DataFrame for seaborn
-    plot_df = pd.DataFrame({
-        'Seconds': df['Seconds'],
-        'Anomaly Score': anomaly_scores
-    })
-    # Plot using seaborn
-    sns.lineplot(x='Seconds', y='Anomaly Score', data=plot_df, ax=ax)
-    # Highlight top anomalies
-    ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
-    # Set x-axis
-    max_seconds = df['Seconds'].max()
     ax.set_xlim(0, max_seconds)
-    num_ticks = 80  # Increase number of ticks for anomaly score graphs
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
-    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()], rotation=90, ha='right')
     ax.set_xlabel('Time')
-    ax.set_ylabel('Anomaly Score')
-    ax.set_title(f'Anomaly Scores Over Time ({title})')
-    # Add grid
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
-def get_random_face_sample(organized_faces_folder, largest_cluster, output_folder):
-    person_folder = os.path.join(organized_faces_folder, f"person_{largest_cluster}")
-    face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
-    if face_files:
-        random_face = np.random.choice(face_files)
-        face_path = os.path.join(person_folder, random_face)
-        output_path = os.path.join(output_folder, "random_face_sample.jpg")
-        # Read the image and resize it to be smaller
-        face_img = cv2.imread(face_path)
-        small_face = cv2.resize(face_img, (160, 160))  # Resize to NxN pixels
-        cv2.imwrite(output_path, small_face)
-        return output_path
-    return None
 def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
@@ -420,7 +445,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         os.makedirs(aligned_faces_folder, exist_ok=True)
         os.makedirs(organized_faces_folder, exist_ok=True)
         clip = VideoFileClip(video_path)
         video_duration = clip.duration
         clip.close()
@@ -434,21 +458,25 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
         frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
         progress(1, "Frame extraction complete")
         progress(0.3, "Processing frames")
         embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count,
                                                                 progress, batch_size)
         if not embeddings_by_frame:
-            return "No faces were extracted from the video.", None, None, None, None, None, None
         progress(0.6, "Clustering embeddings")
         embeddings = list(embeddings_by_frame.values())
         clusters = cluster_embeddings(embeddings)
         progress(0.7, "Organizing faces")
         organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
                                                       original_fps, temp_dir, num_components, video_duration)
@@ -464,9 +492,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
                 X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
         except Exception as e:
             print(f"Error details: {str(e)}")
-            print(f"X shape: {X.shape}")
-            print(f"X dtype: {X.dtype}")
-            return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
         progress(0.95, "Generating plots")
         try:
@@ -484,7 +510,10 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
             return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
         progress(1.0, "Preparing results")
-        results = f"Top {num_anomalies} anomalies (All Features):\n"
         results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
         results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
@@ -496,52 +525,49 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
             results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
             results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
-        # Get a random face sample
-        face_sample = get_random_face_sample(organized_faces_folder, largest_cluster, output_folder)
         return (
             results,
             anomaly_plot_all,
             anomaly_plot_comp,
             *emotion_plots,
-            face_sample
         )
-# Gradio interface
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
-        gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Anomalies"),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
-        gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
-        gr.Slider(minimum=1, maximum=64, step=1, value=8, label="Batch Size")
     ],
     outputs=[
         gr.Textbox(label="Anomaly Detection Results"),
-        gr.Plot(label="Anomaly Scores (All Features)"),
-        gr.Plot(label="Anomaly Scores (Components Only)"),
         gr.Plot(label="Fear Anomalies"),
         gr.Plot(label="Sad Anomalies"),
         gr.Plot(label="Angry Anomalies"),
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
-        gr.Image(type="filepath", label="Random Face Sample of Most Frequent Person"),
     ],
     title="Facial Expressions Anomaly Detection",
     description="""
     This application detects anomalies in facial expressions and emotions from a video input.
-    It focuses on the most frequently appearing person in the video for analysis.
     Adjust the parameters as needed:
     - Number of Anomalies: How many top anomalies or high intensities to highlight
     - Number of Components: Complexity of the facial expression model
     - Desired FPS: Frames per second to analyze (lower for faster processing)
     - Batch Size: Affects processing speed and memory usage
-    """
 )
 if __name__ == "__main__":
-    iface.launch()

 from facenet_pytorch import InceptionResnetV1, MTCNN
 import mediapipe as mp
 from fer import FER
+from scipy import interpolate
+from sklearn.cluster import DBSCAN, KMeans
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.metrics import silhouette_score
 import umap
 import pandas as pd
 import matplotlib
 import tempfile
 import shutil
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 import tensorflow as tf
 matplotlib.rcParams['savefig.dpi'] = 400
 # Initialize models and other global variables
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
 mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.999, 0.999, 0.999], min_face_size=100,
               selection_method='largest')
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
 emotion_detector = FER(mtcnn=False)
 def frame_to_timecode(frame_num, total_frames, duration):
     total_seconds = (frame_num / total_frames) * duration
     hours = int(total_seconds // 3600)
     milliseconds = int((total_seconds - int(total_seconds)) * 1000)
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
 def get_face_embedding_and_emotion(face_img):
     face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
     face_tensor = (face_tensor - 0.5) / 0.5
     return embedding.cpu().numpy().flatten(), emotion_dict
 def alignFace(img):
     img_raw = img.copy()
     results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
     return new_img
 def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
     os.makedirs(output_folder, exist_ok=True)
     clip = VideoFileClip(video_path)
     original_fps = clip.fps
     duration = clip.duration
     total_frames = int(duration * original_fps)
     frame_count = 0
     for t in np.arange(0, duration, step / original_fps):
         frame = clip.get_frame(t)
         img = Image.fromarray(frame)
         img.save(os.path.join(output_folder, f"frame_{frame_count:04d}.jpg"))
         frame_count += 1
         if progress_callback:
             progress = min(100, (frame_count / total_frames_to_extract) * 100)
             progress_callback(progress, f"Extracting frame")
         if frame_count >= total_frames_to_extract:
             break
     clip.close()
     return frame_count, original_fps
                 batch_nums.append(frame_num)
         if batch_frames:
             batch_boxes, batch_probs = mtcnn.detect(batch_frames)
             for j, (frame, frame_num, boxes, probs) in enumerate(
     if len(embeddings) < 2:
         print("Not enough embeddings for clustering. Assigning all to one cluster.")
         return np.zeros(len(embeddings), dtype=int)
     scaler = StandardScaler()
     embeddings_scaled = scaler.fit_transform(embeddings)
+    # Use DBSCAN for adaptive clustering
+    dbscan = DBSCAN(eps=0.5, min_samples=5)  # Adjust these parameters as needed
+    clusters = dbscan.fit_predict(embeddings_scaled)
+    # If DBSCAN couldn't find meaningful clusters, fall back to KMeans
+    if len(set(clusters)) == 1:
+        best_n_clusters = 1
+        best_score = -1
+        for n_clusters in range(2, min(5, len(embeddings))):
+            kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
+            labels = kmeans.fit_predict(embeddings_scaled)
+            score = silhouette_score(embeddings_scaled, labels)
+            if score > best_score:
+                best_score = score
+                best_n_clusters = n_clusters
+        kmeans = KMeans(n_clusters=best_n_clusters, random_state=42, n_init=10)
+        clusters = kmeans.fit_predict(embeddings_scaled)
+    return clusters
 def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
     for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
         dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
         shutil.copy(src, dst)
 def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
                             num_components, video_duration):
     emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
     return df, largest_cluster
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
         super(LSTMAutoencoder, self).__init__()
         self.input_size = input_size
         self.hidden_size = hidden_size
         self.num_layers = num_layers
         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
         self.fc = nn.Linear(hidden_size, input_size)
         out = self.fc(outputs)
         return out
 def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, batch_size=64):
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(X).to(device)
     if X.dim() == 2:
         X = X.unsqueeze(0)
     elif X.dim() == 1:
     with torch.no_grad():
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
     top_indices_all = mse_all.argsort()[-num_anomalies:][::-1]
     anomalies_all = np.zeros(len(mse_all), dtype=bool)
     anomalies_all[top_indices_all] = True
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
         mse_comp = np.mean(
             np.power(X.squeeze(0).cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
     else:
+        mse_comp = mse_all
     top_indices_comp = mse_comp.argsort()[-num_anomalies:][::-1]
     anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
             anomalies_comp, mse_comp, top_indices_comp,
             model)
+from scipy import interpolate
+def plot_with_segments(ax, df_filtered, y_column, color):
+    segments = []
+    current_segment = []
+    for i, (time, score) in enumerate(zip(df_filtered['Seconds'], df_filtered[y_column])):
+        if i > 0 and time - df_filtered['Seconds'].iloc[i-1] > 1:  # Gap of more than 1 second
+            if current_segment:
+                segments.append(current_segment)
+                current_segment = []
+        current_segment.append((time, score))
+    if current_segment:
+        segments.append(current_segment)
+    for segment in segments:
+        times, scores = zip(*segment)
+        if len(times) > 3:
+            try:
+                # Use scipy's interpolate to create a smooth curve
+                f = interpolate.interp1d(times, scores, kind='cubic')
+                smooth_times = np.linspace(min(times), max(times), num=200)
+                smooth_scores = f(smooth_times)
+                ax.plot(smooth_times, smooth_scores, color=color, linewidth=1.5)
+            except ValueError:
+                # Fall back to linear interpolation if cubic fails
+                f = interpolate.interp1d(times, scores, kind='linear')
+                smooth_times = np.linspace(min(times), max(times), num=200)
+                smooth_scores = f(smooth_times)
+                ax.plot(smooth_times, smooth_scores, color=color, linewidth=1.5)
+        else:
+            # For very short segments, just plot the points
+            ax.plot(times, scores, color=color, linewidth=1.5)
+def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
+    plt.figure(figsize=(16, 8), dpi=400)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
+    # Filter out rows with no data
+    mask = ~np.isnan(anomaly_scores)
+    df_filtered = df[mask].copy()
+    df_filtered['anomaly_scores'] = anomaly_scores[mask]
+    if df_filtered.empty:
+        ax.text(0.5, 0.5, "No data available", ha='center', va='center')
+    else:
+        plot_with_segments(ax, df_filtered, 'anomaly_scores', 'blue')
+        # Highlight top anomalies
+        top_indices_filtered = [i for i in top_indices if i in df_filtered.index]
+        ax.scatter(df_filtered['Seconds'].iloc[top_indices_filtered],
+                   df_filtered['anomaly_scores'].iloc[top_indices_filtered],
+                   color='red', s=100, zorder=5)
+    max_seconds = df['Seconds'].max()  # Use the full range for x-axis
     ax.set_xlim(0, max_seconds)
+    num_ticks = 80
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
+    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
+                       rotation=90, ha='center', va='top')
     ax.set_xlabel('Time')
+    ax.set_ylabel('Anomaly Score')
+    ax.set_title(f'Anomaly Scores Over Time ({title})')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
+def plot_emotion(df, emotion, num_anomalies, color):
+    plt.figure(figsize=(16, 8), dpi=400)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
+    # Filter out rows with no data
+    mask = ~np.isnan(df[emotion])
+    df_filtered = df[mask]
+    if df_filtered.empty:
+        ax.text(0.5, 0.5, "No data available", ha='center', va='center')
+    else:
+        plot_with_segments(ax, df_filtered, emotion, color)
+        # Highlight top anomalies
+        top_indices = np.argsort(df_filtered[emotion].values)[-num_anomalies:][::-1]
+        ax.scatter(df_filtered['Seconds'].iloc[top_indices],
+                   df_filtered[emotion].iloc[top_indices],
+                   color='red', s=100, zorder=5)
+    max_seconds = df['Seconds'].max()  # Use the full range for x-axis
     ax.set_xlim(0, max_seconds)
+    num_ticks = 80
     ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
+    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
+                       rotation=90, ha='center', va='top')
     ax.set_xlabel('Time')
+    ax.set_ylabel(f'{emotion.capitalize()} Score')
+    ax.set_title(f'{emotion.capitalize()} Scores Over Time (Top {num_anomalies} in Red)')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
     return fig
+def get_random_face_samples(organized_faces_folder, output_folder):
+    face_samples = {}
+    for cluster_folder in os.listdir(organized_faces_folder):
+        if cluster_folder.startswith("person_"):
+            cluster_id = int(cluster_folder.split("_")[1])
+            person_folder = os.path.join(organized_faces_folder, cluster_folder)
+            face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
+            if face_files:
+                random_face = np.random.choice(face_files)
+                face_path = os.path.join(person_folder, random_face)
+                output_path = os.path.join(output_folder, f"face_sample_person_{cluster_id}.jpg")
+                face_img = cv2.imread(face_path)
+                small_face = cv2.resize(face_img, (160, 160))
+                cv2.imwrite(output_path, small_face)
+                face_samples[cluster_id] = output_path
+    return face_samples
 def process_video(video_path, num_anomalies, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
         os.makedirs(aligned_faces_folder, exist_ok=True)
         os.makedirs(organized_faces_folder, exist_ok=True)
         clip = VideoFileClip(video_path)
         video_duration = clip.duration
         clip.close()
         frame_count, original_fps = extract_frames(video_path, frames_folder, desired_fps, extraction_progress)
         progress(1, "Frame extraction complete")
         progress(0.3, "Processing frames")
         embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count,
                                                                 progress, batch_size)
         if not embeddings_by_frame:
+            return ("No faces were extracted from the video.",
+                    None, None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering embeddings")
         embeddings = list(embeddings_by_frame.values())
         clusters = cluster_embeddings(embeddings)
+        num_clusters = len(set(clusters))  # Get the number of unique clusters
         progress(0.7, "Organizing faces")
         organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
+        progress(0.75, "Getting face samples")
+        face_samples = get_random_face_samples(organized_faces_folder, output_folder)
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
                                                       original_fps, temp_dir, num_components, video_duration)
                 X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
         except Exception as e:
             print(f"Error details: {str(e)}")
+            return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None, None
         progress(0.95, "Generating plots")
         try:
             return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None, None
         progress(1.0, "Preparing results")
+        results = f"Number of persons detected: {num_clusters}\n\n"
+        for cluster_id in range(num_clusters):
+            results += f"Person {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
+        results += f"\nTop {num_anomalies} anomalies (All Features):\n"
         results += "\n".join([f"{score:.4f} at {timecode}" for score, timecode in
                               zip(anomaly_scores_all[top_indices_all], df['Timecode'].iloc[top_indices_all].values)])
         results += f"\n\nTop {num_anomalies} anomalies (Components Only):\n"
             results += f"\n\nTop {num_anomalies} {emotion.capitalize()} Scores:\n"
             results += "\n".join([f"{df[emotion].iloc[i]:.4f} at {df['Timecode'].iloc[i]}" for i in top_indices])
         return (
             results,
             anomaly_plot_all,
             anomaly_plot_comp,
             *emotion_plots,
+            *[face_samples.get(i, None) for i in range(num_clusters)]
         )
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
+        gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Anomalies"),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
+        gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
+        gr.Slider(minimum=1, maximum=32, step=4, value=8, label="Batch Size")
     ],
     outputs=[
         gr.Textbox(label="Anomaly Detection Results"),
+        gr.Plot(label="Anomaly Scores (Facial Features + Emotions)"),
+        gr.Plot(label="Anomaly Scores (Facial Features)"),
         gr.Plot(label="Fear Anomalies"),
         gr.Plot(label="Sad Anomalies"),
         gr.Plot(label="Angry Anomalies"),
         gr.Plot(label="Happy Anomalies"),
         gr.Plot(label="Surprise Anomalies"),
         gr.Plot(label="Neutral Anomalies"),
+        gr.Gallery(label="Detected Persons", columns=[2], rows=[1], height="auto")
     ],
     title="Facial Expressions Anomaly Detection",
     description="""
     This application detects anomalies in facial expressions and emotions from a video input.
+    It identifies distinct persons in the video and provides a sample face for each.
     Adjust the parameters as needed:
     - Number of Anomalies: How many top anomalies or high intensities to highlight
     - Number of Components: Complexity of the facial expression model
     - Desired FPS: Frames per second to analyze (lower for faster processing)
     - Batch Size: Affects processing speed and memory usage
+    """,
+    allow_flagging="never"
 )
 if __name__ == "__main__":
+    iface.launch()