Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Runtime error

App Files Files Community

reab5555 commited on Jul 19, 2024

Commit

1831948

verified ·

1 Parent(s): 21dc0af

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -221

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from scipy import interpolate
 from sklearn.cluster import DBSCAN, KMeans
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from sklearn.metrics import silhouette_score
 import umap
 import pandas as pd
 import matplotlib
@@ -41,6 +42,7 @@ mp_face_mesh = mp.solutions.face_mesh
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
 emotion_detector = FER(mtcnn=False)
 def frame_to_timecode(frame_num, total_frames, duration):
     total_seconds = (frame_num / total_frames) * duration
     hours = int(total_seconds // 3600)
@@ -49,6 +51,7 @@ def frame_to_timecode(frame_num, total_frames, duration):
     milliseconds = int((total_seconds - int(total_seconds)) * 1000)
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
 def get_face_embedding_and_emotion(face_img):
     face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
     face_tensor = (face_tensor - 0.5) / 0.5
@@ -64,6 +67,7 @@ def get_face_embedding_and_emotion(face_img):
     return embedding.cpu().numpy().flatten(), emotion_dict
 def alignFace(img):
     img_raw = img.copy()
     results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -89,6 +93,7 @@ def alignFace(img):
     new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
     return new_img
 def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
     os.makedirs(output_folder, exist_ok=True)
     clip = VideoFileClip(video_path)
@@ -112,6 +117,7 @@ def extract_frames(video_path, output_folder, desired_fps, progress_callback=Non
     clip.close()
     return frame_count, original_fps
 def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
     embeddings_by_frame = {}
     emotions_by_frame = {}
@@ -155,6 +161,7 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
 def cluster_faces(embeddings):
     if len(embeddings) < 2:
         print("Not enough faces for clustering. Assigning all to one cluster.")
@@ -171,6 +178,7 @@ def cluster_faces(embeddings):
     return clusters
 def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
     for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
         person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
@@ -179,8 +187,34 @@ def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder
         dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
         shutil.copy(src, dst)
 def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
-                            num_components, video_duration):
     emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
     person_data = {}
@@ -199,7 +233,10 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
     embeddings_array = np.array(embeddings)
     np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
-    reducer = umap.UMAP(n_components=num_components, random_state=1)
     embeddings_reduced = reducer.fit_transform(embeddings)
     scaler = MinMaxScaler(feature_range=(0, 1))
@@ -216,7 +253,11 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
         'Embedding_Index': range(len(embeddings))
     }
-    for i in range(num_components):
         df_data[f'Comp {i + 1}'] = embeddings_reduced_normalized[:, i]
     for emotion in emotions:
@@ -226,33 +267,6 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
     return df, largest_cluster
-def determine_optimal_anomalies(anomaly_scores, z_threshold=3.5):
-    mean = np.mean(anomaly_scores)
-    std = np.std(anomaly_scores)
-    threshold = mean + z_threshold * std
-    anomalies = anomaly_scores > threshold
-    return anomalies, np.where(anomalies)[0]
-def timecode_to_seconds(timecode):
-    h, m, s = map(float, timecode.split(':'))
-    return h * 3600 + m * 60 + s
-def group_similar_timecodes(timecodes, scores, threshold_seconds=10):
-    grouped = []
-    current_group = []
-    for i, (timecode, score) in enumerate(zip(timecodes, scores)):
-        if not current_group or abs(
-                timecode_to_seconds(timecode) - timecode_to_seconds(current_group[0][0])) <= threshold_seconds:
-            current_group.append((timecode, score, i))
-        else:
-            grouped.append(current_group)
-            current_group = [(timecode, score, i)]
-    if current_group:
-        grouped.append(current_group)
-    return grouped
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
@@ -268,21 +282,17 @@ class LSTMAutoencoder(nn.Module):
         out = self.fc(outputs)
         return out
-def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(X).to(device)
     if X.dim() == 2:
         X = X.unsqueeze(0)
     elif X.dim() == 1:
         X = X.unsqueeze(0).unsqueeze(2)
-    elif X.dim() > 3:
-        raise ValueError(f"Input X should be 1D, 2D or 3D, but got {X.dim()} dimensions")
     print(f"X shape after reshaping: {X.shape}")
-    train_size = int(0.9 * X.shape[1])
-    X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
     model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
     criterion = nn.MSELoss()
     optimizer = optim.Adam(model.parameters())
@@ -290,22 +300,19 @@ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
     for epoch in range(epochs):
         model.train()
         optimizer.zero_grad()
-        output_train = model(X_train)
-        loss_train = criterion(output_train, X_train.squeeze(0))
-        loss_train.backward()
         optimizer.step()
-        model.eval()
-        with torch.no_grad():
-            output_val = model(X_val)
-            loss_val = criterion(output_val, X_val.squeeze(0))
     model.eval()
     with torch.no_grad():
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
-    anomalies_all, top_indices_all = determine_optimal_anomalies(mse_all)
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
@@ -316,53 +323,10 @@ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
     else:
         mse_comp = mse_all
-    anomalies_comp, top_indices_comp = determine_optimal_anomalies(mse_comp)
-    return (anomalies_all, mse_all, top_indices_all,
-            anomalies_comp, mse_comp, top_indices_comp,
-            model)
-def emotion_anomaly_detection(emotion_data, epochs=100, batch_size=64):
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    X = torch.FloatTensor(emotion_data.values).to(device)
-    if X.dim() == 1:
-        X = X.unsqueeze(0).unsqueeze(2)  # Add batch and feature dimensions
-    elif X.dim() == 2:
-        X = X.unsqueeze(0)  # Add batch dimension
-    model = LSTMAutoencoder(input_size=1).to(device)
-    criterion = nn.MSELoss()
-    optimizer = optim.Adam(model.parameters())
-    for epoch in range(epochs):
-        model.train()
-        optimizer.zero_grad()
-        output = model(X)
-        loss = criterion(output, X)
-        loss.backward()
-        optimizer.step()
-    model.eval()
-    with torch.no_grad():
-        reconstructed = model(X).squeeze(0).cpu().numpy()
-    mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
-    anomalies, top_indices = determine_optimal_anomalies(mse)
-    return anomalies, mse, top_indices
-def normalize_scores(scores):
-    min_score = np.min(scores)
-    max_score = np.max(scores)
-    if max_score == min_score:
-        return np.full_like(scores, 100)
-    return ((scores - min_score) / (max_score - min_score)) * 100
-def plot_to_image(fig):
-    buf = io.BytesIO()
-    fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
-    buf.seek(0)
-    return buf
 def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -389,106 +353,74 @@ def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
-    anomalies, top_indices = determine_optimal_anomalies(mse)
-    return anomalies, mse, top_indices
-def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
-    plt.figure(figsize=(16, 8), dpi=300)
-    fig, ax = plt.subplots(figsize=(16, 8))
-    df['Seconds'] = df['Timecode'].apply(
-        lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Filter out data points without faces
-    valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
-    seconds = df['Seconds'].iloc[valid_indices].values
-    scores = anomaly_scores[valid_indices]
-    ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
-    top_indices = [idx for idx in top_indices if idx in valid_indices]
-    ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
-    # Calculate and plot baseline
-    non_anomalous_scores = np.delete(scores, top_indices)
-    baseline = np.mean(non_anomalous_scores)
-    ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
-    ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
-            verticalalignment='bottom', horizontalalignment='right', color='black')
-    grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
-                                                scores[top_indices])
-    for group in grouped_timecodes:
-        max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
-        timecode, score, idx = group[max_score_idx]
-        ax.annotate(timecode,
-                    (df['Seconds'].iloc[top_indices[idx]], score),
-                    xytext=(5, 5), textcoords='offset points',
-                    fontsize=6, color='red')
-    max_seconds = df['Seconds'].max()
-    ax.set_xlim(0, max_seconds)
-    num_ticks = 100
-    ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
-    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
-                       rotation=90, ha='center', va='top')
-    ax.set_xlabel('Time')
-    ax.set_ylabel('Anomaly Score')
-    ax.set_title(title)
-    ax.grid(True, linestyle='--', alpha=0.7)
-    plt.tight_layout()
-    plt.close()
-    return fig
-def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
     plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
-    # Filter out data points without faces
-    valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
-    seconds = df['Seconds'].iloc[valid_indices].values
-    scores = anomaly_scores[valid_indices]
-    ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
-    top_indices = [idx for idx in top_indices if idx in valid_indices]
-    ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
-    # Calculate and plot baseline
-    non_anomalous_scores = np.delete(anomaly_scores, top_indices)
-    baseline = np.mean(non_anomalous_scores)
-    ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
-    ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
-            verticalalignment='bottom', horizontalalignment='right', color='black')
-    grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
-                                                anomaly_scores[top_indices])
-    for group in grouped_timecodes:
-        max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
-        timecode, score, idx = group[max_score_idx]
-        ax.annotate(timecode,
-                    (df['Seconds'].iloc[top_indices[idx]], score),
-                    xytext=(5, 5), textcoords='offset points',
-                    fontsize=6, color='red')
     max_seconds = df['Seconds'].max()
-    ax.set_xlim(0, max_seconds)
     num_ticks = 100
-    ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
-    ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
-                       rotation=90, ha='center', va='top')
     ax.set_xlabel('Time')
-    ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
-    ax.set_title(f'{emotion.capitalize()} Anomaly Scores')
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
@@ -522,10 +454,18 @@ def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
                             cv2.imwrite(output_path, small_face)
                             face_samples["others"].append(output_path)
     return face_samples
-def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
     with tempfile.TemporaryDirectory() as temp_dir:
         aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
         organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
@@ -552,7 +492,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         if not aligned_face_paths:
             return ("No faces were extracted from the video.",
-                    None, None, None, None, None, None, None, None)
         progress(0.6, "Clustering faces")
         embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
@@ -564,7 +504,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
-                                                      original_fps, temp_dir, num_components, video_duration)
         progress(0.85, "Getting face samples")
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
@@ -572,46 +512,29 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
                            col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
         X = df[feature_columns].values
         try:
-            anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
-                X, feature_columns, batch_size=batch_size)
-            anomaly_scores_all = normalize_scores(anomaly_scores_all)
-            anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
-            emotion_anomalies = {}
-            for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
-                anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
-                emotion_anomalies[emotion] = {
-                    'anomalies': anomalies,
-                    'scores': normalize_scores(scores),
-                    'indices': indices
-                }
-        except Exception as e:
-            print(f"Error details: {str(e)}")
-            return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None
-        progress(0.95, "Generating plots")
-        try:
-            anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all,
-                                                   "Facial Features + Emotions",
-                                                   df['Timecode'].iloc[top_indices_all].values)
-            anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
-                                                    df['Timecode'].iloc[top_indices_comp].values)
             emotion_plots = [
-                plot_emotion(df, emotion,
-                             emotion_anomalies[emotion]['scores'],
-                             emotion_anomalies[emotion]['indices'],
-                             color,
-                             df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
                 for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
                                           ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
             ]
         except Exception as e:
-            return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None
         progress(1.0, "Preparing results")
         results = f"Number of persons/clusters detected: {num_clusters}\n\n"
@@ -619,55 +542,58 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
         return (
             results,
-            anomaly_plot_all,
-            anomaly_plot_comp,
             *emotion_plots,
             face_samples["most_frequent"],
             face_samples["others"]
         )
 gallery_outputs = [
     gr.Gallery(label="Most Frequent Person Random Samples", columns=5, rows=2, height="auto"),
     gr.Gallery(label="Other Persons Random Samples", columns=5, rows=1, height="auto")
 ]
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
-        gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Components"),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
     ],
     outputs=[
         gr.Textbox(label="Anomaly Detection Results"),
-        gr.Plot(label="Anomaly Scores (Facial Features + Emotions)"),
-        gr.Plot(label="Anomaly Scores (Facial Features)"),
-        gr.Plot(label="Fear Anomalies"),
-        gr.Plot(label="Sad Anomalies"),
-        gr.Plot(label="Angry Anomalies"),
-        gr.Plot(label="Happy Anomalies"),
-        gr.Plot(label="Surprise Anomalies"),
-        gr.Plot(label="Neutral Anomalies"),
     ] + gallery_outputs,
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
         It identifies distinct persons in the video and provides sample faces for each, with multiple samples for the most frequent person.
         Adjust the parameters as needed:
-        - Number of Components: Complexity of the facial expression model
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage
-        Click on any graph to enlarge it.
         """,
     allow_flagging="never"
 )
-iface.launch()

 from sklearn.cluster import DBSCAN, KMeans
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from sklearn.metrics import silhouette_score
+from sklearn.decomposition import PCA
 import umap
 import pandas as pd
 import matplotlib
 face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
 emotion_detector = FER(mtcnn=False)
 def frame_to_timecode(frame_num, total_frames, duration):
     total_seconds = (frame_num / total_frames) * duration
     hours = int(total_seconds // 3600)
     milliseconds = int((total_seconds - int(total_seconds)) * 1000)
     return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
 def get_face_embedding_and_emotion(face_img):
     face_tensor = torch.tensor(face_img).permute(2, 0, 1).unsqueeze(0).float() / 255
     face_tensor = (face_tensor - 0.5) / 0.5
     return embedding.cpu().numpy().flatten(), emotion_dict
 def alignFace(img):
     img_raw = img.copy()
     results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     new_img = cv2.warpAffine(img_raw, rotation_matrix, (width, height))
     return new_img
 def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
     os.makedirs(output_folder, exist_ok=True)
     clip = VideoFileClip(video_path)
     clip.close()
     return frame_count, original_fps
 def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
     embeddings_by_frame = {}
     emotions_by_frame = {}
     return embeddings_by_frame, emotions_by_frame, aligned_face_paths
 def cluster_faces(embeddings):
     if len(embeddings) < 2:
         print("Not enough faces for clustering. Assigning all to one cluster.")
     return clusters
 def organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder):
     for (frame_num, embedding), cluster in zip(embeddings_by_frame.items(), clusters):
         person_folder = os.path.join(organized_faces_folder, f"person_{cluster}")
         dst = os.path.join(person_folder, f"frame_{frame_num}_face.jpg")
         shutil.copy(src, dst)
+def find_optimal_components(embeddings, max_components=10):
+    pca = PCA(n_components=max_components)
+    pca.fit(embeddings)
+    explained_variance_ratio = pca.explained_variance_ratio_
+    cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
+    # Plot explained variance ratio
+    plt.figure(figsize=(10, 6))
+    plt.plot(range(1, max_components + 1), cumulative_variance_ratio, 'bo-')
+    plt.xlabel('Number of Components')
+    plt.ylabel('Cumulative Explained Variance Ratio')
+    plt.title('Explained Variance Ratio vs. Number of Components')
+    plt.grid(True)
+    # Find elbow point
+    differences = np.diff(cumulative_variance_ratio)
+    elbow_point = np.argmin(differences) + 1
+    plt.axvline(x=elbow_point, color='r', linestyle='--', label=f'Elbow point: {elbow_point}')
+    plt.legend()
+    return elbow_point, plt
 def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps, original_fps, output_folder,
+                            video_duration):
     emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
     person_data = {}
     embeddings_array = np.array(embeddings)
     np.save(os.path.join(output_folder, 'face_embeddings.npy'), embeddings_array)
+    # Find optimal number of components
+    optimal_components, _ = find_optimal_components(embeddings_array)
+    reducer = umap.UMAP(n_components=optimal_components, random_state=1)
     embeddings_reduced = reducer.fit_transform(embeddings)
     scaler = MinMaxScaler(feature_range=(0, 1))
         'Embedding_Index': range(len(embeddings))
     }
+    # Add raw embeddings
+    for i in range(len(embeddings[0])):
+        df_data[f'Raw_Embedding_{i}'] = [embedding[i] for embedding in embeddings]
+    for i in range(optimal_components):
         df_data[f'Comp {i + 1}'] = embeddings_reduced_normalized[:, i]
     for emotion in emotions:
     return df, largest_cluster
 class LSTMAutoencoder(nn.Module):
     def __init__(self, input_size, hidden_size=64, num_layers=2):
         out = self.fc(outputs)
         return out
+def lstm_anomaly_detection(X, feature_columns, raw_embedding_columns, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     X = torch.FloatTensor(X).to(device)
     if X.dim() == 2:
         X = X.unsqueeze(0)
     elif X.dim() == 1:
         X = X.unsqueeze(0).unsqueeze(2)
     print(f"X shape after reshaping: {X.shape}")
     model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
     criterion = nn.MSELoss()
     optimizer = optim.Adam(model.parameters())
     for epoch in range(epochs):
         model.train()
         optimizer.zero_grad()
+        output = model(X)
+        loss = criterion(output, X)
+        loss.backward()
         optimizer.step()
+        if epoch % 10 == 0:
+            print(f"Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}")
     model.eval()
     with torch.no_grad():
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
     component_columns = [col for col in feature_columns if col.startswith('Comp')]
     component_indices = [feature_columns.index(col) for col in component_columns]
     else:
         mse_comp = mse_all
+    raw_embedding_indices = [feature_columns.index(col) for col in raw_embedding_columns]
+    mse_raw = np.mean(np.power(X.squeeze(0).cpu().numpy()[:, raw_embedding_indices] - reconstructed[:, raw_embedding_indices], 2), axis=1)
+    return mse_all, mse_comp, mse_raw
 def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
         reconstructed = model(X).squeeze(0).cpu().numpy()
     mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
+    return mse
+def determine_anomalies(mse_values, threshold=3.5):
+    mean = np.mean(mse_values)
+    std = np.std(mse_values)
+    anomalies = mse_values > (mean + threshold * std)
+    return anomalies
+def plot_mse(df, mse_values, title, color='blue', time_threshold=1, hide_first_n=3):
     plt.figure(figsize=(16, 8), dpi=300)
     fig, ax = plt.subplots(figsize=(16, 8))
     df['Seconds'] = df['Timecode'].apply(
         lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
+    # Plot all points
+    ax.scatter(df['Seconds'], mse_values, color=color, alpha=0.7, s=10)
+    # Determine anomalies
+    anomalies = determine_anomalies(mse_values)
+    # Hide the first n anomalies
+    visible_anomalies = np.where(anomalies)[0][hide_first_n:]
+    ax.scatter(df['Seconds'].iloc[visible_anomalies], mse_values[visible_anomalies], color='red', s=50, zorder=5)
+    # Group closely occurring anomalies and annotate only the highest MSE
+    anomaly_data = list(zip(df['Timecode'].iloc[visible_anomalies],
+                            df['Seconds'].iloc[visible_anomalies],
+                            mse_values[visible_anomalies]))
+    anomaly_data.sort(key=lambda x: x[1])  # Sort by seconds
+    grouped_anomalies = []
+    current_group = []
+    for timecode, sec, mse in anomaly_data:
+        if not current_group or sec - current_group[-1][1] <= time_threshold:
+            current_group.append((timecode, sec, mse))
+        else:
+            grouped_anomalies.append(current_group)
+            current_group = [(timecode, sec, mse)]
+    if current_group:
+        grouped_anomalies.append(current_group)
+    for group in grouped_anomalies:
+        highest_mse_anomaly = max(group, key=lambda x: x[2])
+        timecode, sec, mse = highest_mse_anomaly
+        ax.annotate(timecode, (sec, mse), textcoords="offset points", xytext=(0, 10),
+                    ha='center', fontsize=8, color='red')
+    # Add baseline (mean MSE) line
+    mean_mse = np.mean(mse_values)
+    ax.axhline(y=mean_mse, color='black', linestyle='--', linewidth=1)
+    ax.text(df['Seconds'].max(), mean_mse, f'Baseline ({mean_mse:.6f})',
+            verticalalignment='bottom', horizontalalignment='right', color='black', fontsize=8)
+    # Set x-axis labels to timecodes
     max_seconds = df['Seconds'].max()
     num_ticks = 100
+    tick_locations = np.linspace(0, max_seconds, num_ticks)
+    tick_labels = [frame_to_timecode(int(s * df['Frame'].max() / max_seconds), df['Frame'].max(), max_seconds)
+                   for s in tick_locations]
+    ax.set_xticks(tick_locations)
+    ax.set_xticklabels(tick_labels, rotation=90, ha='center', fontsize=6)
     ax.set_xlabel('Time')
+    ax.set_ylabel('Mean Squared Error')
+    ax.set_title(title)
     ax.grid(True, linestyle='--', alpha=0.7)
     plt.tight_layout()
                             cv2.imwrite(output_path, small_face)
                             face_samples["others"].append(output_path)
     return face_samples
+def process_video(video_path, desired_fps, batch_size, progress=gr.Progress()):
     output_folder = "output"
     os.makedirs(output_folder, exist_ok=True)
+    # Initialize plot variables
+    mse_plot_all = None
+    mse_plot_comp = None
+    mse_plot_raw = None
+    emotion_plots = [None] * 6  # For the 6 emotions
+    face_samples = {"most_frequent": [], "others": []}
     with tempfile.TemporaryDirectory() as temp_dir:
         aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
         organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
         if not aligned_face_paths:
             return ("No faces were extracted from the video.",
+                    None, None, None, None, None, None, None, None, None, [], [])
         progress(0.6, "Clustering faces")
         embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
         progress(0.8, "Saving person data")
         df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
+                                                      original_fps, temp_dir, video_duration)
         progress(0.85, "Getting face samples")
         face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
         progress(0.9, "Performing anomaly detection")
         feature_columns = [col for col in df.columns if
                            col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
+        raw_embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
         X = df[feature_columns].values
         try:
+            mse_all, mse_comp, mse_raw = lstm_anomaly_detection(
+                X, feature_columns, raw_embedding_columns, batch_size=batch_size)
+            progress(0.95, "Generating plots")
+            mse_plot_all = plot_mse(df, mse_all, "Facial Features + Emotions", color='blue', hide_first_n=3)
+            mse_plot_comp = plot_mse(df, mse_comp, "Facial Features", color='deepskyblue', hide_first_n=3)
+            mse_plot_raw = plot_mse(df, mse_raw, "Facial Embeddings", color='steelblue', hide_first_n=3)
             emotion_plots = [
+                plot_mse(df, embedding_anomaly_detection(df[emotion].values.reshape(-1, 1)),
+                         f"MSE: {emotion.capitalize()}", color=color, hide_first_n=3)
                 for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
                                           ['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
             ]
         except Exception as e:
+            print(f"Error details: {str(e)}")
+            return (f"Error in anomaly detection: {str(e)}",
+                    None, None, None, None, None, None, None, None, None, [], [])
         progress(1.0, "Preparing results")
         results = f"Number of persons/clusters detected: {num_clusters}\n\n"
         for cluster_id in range(num_clusters):
             results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
         return (
             results,
+            mse_plot_all,
+            mse_plot_comp,
+            mse_plot_raw,
             *emotion_plots,
             face_samples["most_frequent"],
             face_samples["others"]
         )
+# Define gallery outputs
 gallery_outputs = [
     gr.Gallery(label="Most Frequent Person Random Samples", columns=5, rows=2, height="auto"),
     gr.Gallery(label="Other Persons Random Samples", columns=5, rows=1, height="auto")
 ]
+# Update the Gradio interface
 iface = gr.Interface(
     fn=process_video,
     inputs=[
         gr.Video(),
         gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
         gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
     ],
     outputs=[
         gr.Textbox(label="Anomaly Detection Results"),
+        gr.Plot(label="MSE: Facial Features + Emotions"),
+        gr.Plot(label="MSE: Facial Features (UMAP)"),
+        gr.Plot(label="MSE: Raw Facial Embeddings"),
+        gr.Plot(label="MSE: Fear"),
+        gr.Plot(label="MSE: Sad"),
+        gr.Plot(label="MSE: Angry"),
+        gr.Plot(label="MSE: Happy"),
+        gr.Plot(label="MSE: Surprise"),
+        gr.Plot(label="MSE: Neutral"),
     ] + gallery_outputs,
     title="Facial Expressions Anomaly Detection",
     description="""
         This application detects anomalies in facial expressions and emotions from a video input.
         It identifies distinct persons in the video and provides sample faces for each, with multiple samples for the most frequent person.
+        The graphs show Mean Squared Error (MSE) values for different aspects of facial expressions and emotions over time.
+        Each point represents a frame, with red points indicating detected anomalies.
+        Anomalies are annotated with their corresponding timecodes.
+        Higher MSE values indicate more unusual or anomalous expressions or emotions at that point in the video.
         Adjust the parameters as needed:
         - Desired FPS: Frames per second to analyze (lower for faster processing)
         - Batch Size: Affects processing speed and memory usage
         """,
     allow_flagging="never"
 )
+# Launch the interface
+iface.launch()