File size: 9,334 Bytes
bafab47 c500bb3 bafab47 c500bb3 bafab47 d5b3c67 bafab47 ac264be bafab47 ac264be bafab47 c500bb3 bafab47 c500bb3 bafab47 162ac6d 863699a e48aa26 aeb4947 162ac6d 93212ca 162ac6d c500bb3 162ac6d c500bb3 90fe712 c500bb3 90fe712 c500bb3 d05a31e 162ac6d c500bb3 162ac6d c500bb3 aeb4947 162ac6d c500bb3 162ac6d c500bb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
import time
from video_processing import process_video
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Ensure high DPI plots
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
try:
print("Starting video processing...")
results = process_video(video_input_path, anomaly_threshold_input, fps, progress=progress)
print("Video processing completed.")
if isinstance(results[0], str) and results[0].startswith("Error"):
print(f"Error occurred: {results[0]}")
return [results[0]] + [None] * 27
exec_time, results_summary, df, mse_embeddings, mse_posture, mse_voice, \
mse_plot_embeddings, mse_plot_posture, mse_plot_voice, \
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice, \
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice, \
face_samples_frequent, \
anomaly_faces_embeddings, anomaly_frames_posture_images, \
aligned_faces_folder, frames_folder, \
heatmap_video_path, combined_mse_plot, correlation_heatmap = results
anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings] if anomaly_faces_embeddings is not None else []
anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images] if anomaly_frames_posture_images is not None else []
face_samples_frequent = [Image.open(path) for path in face_samples_frequent] if face_samples_frequent is not None else []
output = [
exec_time, results_summary,
mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
face_samples_frequent,
heatmap_video_path, combined_mse_plot, correlation_heatmap
]
return output
except Exception as e:
error_message = f"An error occurred: {str(e)}"
print(error_message)
import traceback
traceback.print_exc()
return [error_message] + [None] * 16
def on_button_click(video, threshold, fps):
results = process_and_show_completion(video, threshold, fps)
return {
execution_time: gr.update(visible=True, value=results[0]),
results_tab: gr.update(visible=True),
description_tab: gr.update(visible=False),
results_text: results[1],
mse_features_plot: results[2],
mse_posture_plot: results[3],
mse_voice_plot: results[4],
mse_features_hist: results[5],
mse_posture_hist: results[6],
mse_voice_hist: results[7],
mse_features_heatmap: results[8],
mse_posture_heatmap: results[9],
mse_voice_heatmap: results[10],
anomaly_frames_features: results[11],
anomaly_frames_posture: results[12],
face_samples_most_frequent: results[13],
heatmap_video: results[14],
combined_mse_plot: results[15],
correlation_heatmap_plot: results[16]
}
with gr.Blocks() as iface:
gr.Markdown("""
# Multimodal Behavioral Anomalies Detection
This tool detects anomalies in facial expressions, body language, and voice over the timeline of a video.
It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
""")
with gr.Row():
video_input = gr.Video()
anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
process_btn = gr.Button("Detect Anomalies")
progress_bar = gr.Progress()
execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
with gr.Tabs() as tabs:
with gr.TabItem("Description", id="description_tab") as description_tab:
with gr.Column():
gr.Markdown("""
# Multimodal Behavioral Anomalies Detection
The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
## Applications
- Identify suspicious behavior in surveillance footage.
- Analyze micro-expressions.
- Monitor and assess emotional states in communications.
- Evaluate changes in vocal tone and speech patterns.
## Features
- **Face Extraction**: Extracts faces from video frames using the MTCNN model.
- **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
- **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
- **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
- **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
- **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
## Limitations
- **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
- **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
- **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
- **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
- **Generalization**: The model may not generalize well to all types of videos and contexts.
- **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
## Conclusion
This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
""")
with gr.TabItem("Results", id="results_tab", visible=False) as results_tab:
with gr.Tabs():
with gr.TabItem("Facial Features"):
results_text = gr.TextArea(label="Faces Breakdown", lines=5)
mse_features_plot = gr.Plot(label="MSE: Facial Features")
mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
with gr.TabItem("Body Posture"):
mse_posture_plot = gr.Plot(label="MSE: Body Posture")
mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
with gr.TabItem("Voice"):
mse_voice_plot = gr.Plot(label="MSE: Voice")
mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
with gr.TabItem("Combined"):
heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
combined_mse_plot = gr.Plot(label="Combined MSE Plot")
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
process_btn.click(
fn=on_button_click,
inputs=[video_input, anomaly_threshold, fps_slider],
outputs=[
execution_time, results_tab, description_tab,
results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
mse_features_hist, mse_posture_hist, mse_voice_hist,
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
anomaly_frames_features, anomaly_frames_posture,
face_samples_most_frequent, heatmap_video, combined_mse_plot,
correlation_heatmap_plot
]
)
if __name__ == "__main__":
iface.launch() |