Update app.py
Browse files
app.py
CHANGED
@@ -2,13 +2,10 @@ import gradio as gr
|
|
2 |
import time
|
3 |
from video_processing import process_video
|
4 |
from PIL import Image
|
5 |
-
import matplotlib
|
6 |
-
import numpy as np
|
7 |
-
import pandas as pd
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
plt.rcParams['savefig.dpi'] = 300
|
12 |
|
13 |
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
|
14 |
try:
|
@@ -36,11 +33,14 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
|
|
36 |
|
37 |
output = [
|
38 |
exec_time, results_summary,
|
|
|
39 |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
|
40 |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
|
41 |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
|
42 |
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
|
43 |
face_samples_frequent,
|
|
|
|
|
44 |
heatmap_video_path, combined_mse_plot, correlation_heatmap
|
45 |
]
|
46 |
|
@@ -51,44 +51,10 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
|
|
51 |
print(error_message)
|
52 |
import traceback
|
53 |
traceback.print_exc()
|
54 |
-
return [error_message] + [None] *
|
55 |
|
56 |
-
def
|
57 |
-
|
58 |
-
|
59 |
-
# Show execution time immediately and hide description
|
60 |
-
yield {
|
61 |
-
execution_time: gr.update(visible=True, value=0),
|
62 |
-
description: gr.update(visible=False),
|
63 |
-
results: gr.update(visible=True)
|
64 |
-
}
|
65 |
-
|
66 |
-
process_results = process_and_show_completion(video, threshold, fps)
|
67 |
-
end_time = time.time()
|
68 |
-
exec_time = end_time - start_time
|
69 |
-
|
70 |
-
return {
|
71 |
-
execution_time: gr.update(visible=True, value=exec_time),
|
72 |
-
results_text: process_results[1],
|
73 |
-
mse_features_plot: process_results[2],
|
74 |
-
mse_posture_plot: process_results[3],
|
75 |
-
mse_voice_plot: process_results[4],
|
76 |
-
mse_features_hist: process_results[5],
|
77 |
-
mse_posture_hist: process_results[6],
|
78 |
-
mse_voice_hist: process_results[7],
|
79 |
-
mse_features_heatmap: process_results[8],
|
80 |
-
mse_posture_heatmap: process_results[9],
|
81 |
-
mse_voice_heatmap: process_results[10],
|
82 |
-
anomaly_frames_features: process_results[11],
|
83 |
-
anomaly_frames_posture: process_results[12],
|
84 |
-
face_samples_most_frequent: process_results[13],
|
85 |
-
heatmap_video: process_results[14],
|
86 |
-
combined_mse_plot: process_results[15],
|
87 |
-
correlation_heatmap_plot: process_results[16],
|
88 |
-
video_display_facial: video,
|
89 |
-
video_display_body: video,
|
90 |
-
video_display_voice: video
|
91 |
-
}
|
92 |
|
93 |
with gr.Blocks() as iface:
|
94 |
gr.Markdown("""
|
@@ -98,92 +64,71 @@ with gr.Blocks() as iface:
|
|
98 |
It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
|
99 |
""")
|
100 |
|
101 |
-
|
|
|
102 |
|
103 |
anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
|
104 |
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
|
105 |
process_btn = gr.Button("Detect Anomalies")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
|
108 |
-
|
109 |
-
description = gr.Markdown(visible=True, value="""
|
110 |
-
# Multimodal Behavioral Anomalies Detection
|
111 |
-
|
112 |
-
The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
|
113 |
-
|
114 |
-
It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
|
115 |
-
|
116 |
-
## Applications
|
117 |
-
|
118 |
-
- Identify suspicious behavior in surveillance footage.
|
119 |
-
- Analyze micro-expressions.
|
120 |
-
- Monitor and assess emotional states in communications.
|
121 |
-
- Evaluate changes in vocal tone and speech patterns.
|
122 |
-
|
123 |
-
## Features
|
124 |
-
|
125 |
-
- **Face Extraction**: Extracts faces from video frames using the MTCNN model.
|
126 |
-
- **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
|
127 |
-
- **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
|
128 |
-
- **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
|
129 |
-
- **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
|
130 |
-
- **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
|
131 |
-
|
132 |
-
## Limitations
|
133 |
-
|
134 |
-
- **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
|
135 |
-
- **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
|
136 |
-
- **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
|
137 |
-
- **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
|
138 |
-
- **Generalization**: The model may not generalize well to all types of videos and contexts.
|
139 |
-
- **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
|
140 |
-
|
141 |
-
## Conclusion
|
142 |
-
This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
|
143 |
-
""")
|
144 |
-
|
145 |
-
results = gr.Tabs(visible=False)
|
146 |
-
with results:
|
147 |
-
with gr.TabItem("Facial Features"):
|
148 |
-
video_display_facial = gr.Video(label="Input Video")
|
149 |
-
results_text = gr.TextArea(label="Faces Breakdown", lines=5)
|
150 |
-
mse_features_plot = gr.Plot(label="MSE: Facial Features")
|
151 |
-
mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
|
152 |
-
mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
|
153 |
-
anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
|
154 |
-
face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
|
155 |
-
|
156 |
-
with gr.TabItem("Body Posture"):
|
157 |
-
video_display_body = gr.Video(label="Input Video")
|
158 |
-
mse_posture_plot = gr.Plot(label="MSE: Body Posture")
|
159 |
-
mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
|
160 |
-
mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
|
161 |
-
anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
|
162 |
-
|
163 |
-
with gr.TabItem("Voice"):
|
164 |
-
video_display_voice = gr.Video(label="Input Video")
|
165 |
-
mse_voice_plot = gr.Plot(label="MSE: Voice")
|
166 |
-
mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
|
167 |
-
mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
|
168 |
-
|
169 |
-
with gr.TabItem("Combined"):
|
170 |
-
heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
|
171 |
-
combined_mse_plot = gr.Plot(label="Combined MSE Plot")
|
172 |
-
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
|
173 |
-
|
174 |
process_btn.click(
|
175 |
-
|
176 |
inputs=[video_input, anomaly_threshold, fps_slider],
|
177 |
outputs=[
|
178 |
-
execution_time,
|
179 |
-
|
|
|
180 |
mse_features_hist, mse_posture_hist, mse_voice_hist,
|
181 |
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
|
182 |
anomaly_frames_features, anomaly_frames_posture,
|
183 |
-
face_samples_most_frequent,
|
184 |
-
|
|
|
|
|
185 |
]
|
|
|
|
|
|
|
|
|
186 |
)
|
187 |
|
188 |
if __name__ == "__main__":
|
189 |
-
iface.launch(
|
|
|
2 |
import time
|
3 |
from video_processing import process_video
|
4 |
from PIL import Image
|
5 |
+
import matplotlib
|
|
|
|
|
6 |
|
7 |
+
matplotlib.rcParams['figure.dpi'] = 300
|
8 |
+
matplotlib.rcParams['savefig.dpi'] = 300
|
|
|
9 |
|
10 |
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
|
11 |
try:
|
|
|
33 |
|
34 |
output = [
|
35 |
exec_time, results_summary,
|
36 |
+
df, mse_embeddings, mse_posture, mse_voice,
|
37 |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
|
38 |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
|
39 |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
|
40 |
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
|
41 |
face_samples_frequent,
|
42 |
+
aligned_faces_folder, frames_folder,
|
43 |
+
mse_embeddings, mse_posture, mse_voice,
|
44 |
heatmap_video_path, combined_mse_plot, correlation_heatmap
|
45 |
]
|
46 |
|
|
|
51 |
print(error_message)
|
52 |
import traceback
|
53 |
traceback.print_exc()
|
54 |
+
return [error_message] + [None] * 27
|
55 |
|
56 |
+
def show_results(outputs):
|
57 |
+
return gr.Group(visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
with gr.Blocks() as iface:
|
60 |
gr.Markdown("""
|
|
|
64 |
It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
|
65 |
""")
|
66 |
|
67 |
+
with gr.Row():
|
68 |
+
video_input = gr.Video()
|
69 |
|
70 |
anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
|
71 |
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
|
72 |
process_btn = gr.Button("Detect Anomalies")
|
73 |
+
progress_bar = gr.Progress()
|
74 |
+
execution_time = gr.Number(label="Execution Time (seconds)")
|
75 |
+
|
76 |
+
with gr.Group(visible=False) as results_group:
|
77 |
+
with gr.Tabs():
|
78 |
+
with gr.TabItem("Facial Features"):
|
79 |
+
results_text = gr.TextArea(label="Faces Breakdown", lines=5)
|
80 |
+
mse_features_plot = gr.Plot(label="MSE: Facial Features")
|
81 |
+
mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
|
82 |
+
mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
|
83 |
+
anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
|
84 |
+
face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
|
85 |
+
|
86 |
+
with gr.TabItem("Body Posture"):
|
87 |
+
mse_posture_plot = gr.Plot(label="MSE: Body Posture")
|
88 |
+
mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
|
89 |
+
mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
|
90 |
+
anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
|
91 |
+
|
92 |
+
with gr.TabItem("Voice"):
|
93 |
+
mse_voice_plot = gr.Plot(label="MSE: Voice")
|
94 |
+
mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
|
95 |
+
mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
|
96 |
+
|
97 |
+
with gr.TabItem("Combined"):
|
98 |
+
heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
|
99 |
+
combined_mse_plot = gr.Plot(label="Combined MSE Plot")
|
100 |
+
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
|
101 |
+
|
102 |
+
df_store = gr.State()
|
103 |
+
mse_features_store = gr.State()
|
104 |
+
mse_posture_store = gr.State()
|
105 |
+
mse_voice_store = gr.State()
|
106 |
+
aligned_faces_folder_store = gr.State()
|
107 |
+
frames_folder_store = gr.State()
|
108 |
+
mse_heatmap_embeddings_store = gr.State()
|
109 |
+
mse_heatmap_posture_store = gr.State()
|
110 |
+
mse_heatmap_voice_store = gr.State()
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
process_btn.click(
|
113 |
+
process_and_show_completion,
|
114 |
inputs=[video_input, anomaly_threshold, fps_slider],
|
115 |
outputs=[
|
116 |
+
execution_time, results_text, df_store,
|
117 |
+
mse_features_store, mse_posture_store, mse_voice_store,
|
118 |
+
mse_features_plot, mse_posture_plot, mse_voice_plot,
|
119 |
mse_features_hist, mse_posture_hist, mse_voice_hist,
|
120 |
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
|
121 |
anomaly_frames_features, anomaly_frames_posture,
|
122 |
+
face_samples_most_frequent,
|
123 |
+
aligned_faces_folder_store, frames_folder_store,
|
124 |
+
mse_heatmap_embeddings_store, mse_heatmap_posture_store, mse_heatmap_voice_store,
|
125 |
+
heatmap_video, combined_mse_plot, correlation_heatmap_plot
|
126 |
]
|
127 |
+
).then(
|
128 |
+
show_results,
|
129 |
+
inputs=None,
|
130 |
+
outputs=results_group
|
131 |
)
|
132 |
|
133 |
if __name__ == "__main__":
|
134 |
+
iface.launch()
|