Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,13 @@ import gradio as gr
|
|
2 |
import time
|
3 |
from video_processing import process_video
|
4 |
from PIL import Image
|
5 |
-
import matplotlib
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
|
|
9 |
|
10 |
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
|
11 |
try:
|
@@ -33,14 +36,11 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
|
|
33 |
|
34 |
output = [
|
35 |
exec_time, results_summary,
|
36 |
-
df, mse_embeddings, mse_posture, mse_voice,
|
37 |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
|
38 |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
|
39 |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
|
40 |
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
|
41 |
face_samples_frequent,
|
42 |
-
aligned_faces_folder, frames_folder,
|
43 |
-
mse_embeddings, mse_posture, mse_voice,
|
44 |
heatmap_video_path, combined_mse_plot, correlation_heatmap
|
45 |
]
|
46 |
|
@@ -51,10 +51,32 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
|
|
51 |
print(error_message)
|
52 |
import traceback
|
53 |
traceback.print_exc()
|
54 |
-
return [error_message] + [None] *
|
55 |
|
56 |
-
def
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
with gr.Blocks() as iface:
|
60 |
gr.Markdown("""
|
@@ -71,10 +93,11 @@ with gr.Blocks() as iface:
|
|
71 |
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
|
72 |
process_btn = gr.Button("Detect Anomalies")
|
73 |
progress_bar = gr.Progress()
|
74 |
-
|
|
|
75 |
|
76 |
-
with gr.Tabs():
|
77 |
-
with gr.TabItem("Description"):
|
78 |
with gr.Column():
|
79 |
gr.Markdown("""
|
80 |
# Multimodal Behavioral Anomalies Detection
|
@@ -90,10 +113,29 @@ with gr.Blocks() as iface:
|
|
90 |
- Monitor and assess emotional states in communications.
|
91 |
- Evaluate changes in vocal tone and speech patterns.
|
92 |
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
""")
|
95 |
|
96 |
-
with gr.TabItem("Results", visible=False) as
|
97 |
with gr.Tabs():
|
98 |
with gr.TabItem("Facial Features"):
|
99 |
results_text = gr.TextArea(label="Faces Breakdown", lines=5)
|
@@ -119,37 +161,19 @@ with gr.Blocks() as iface:
|
|
119 |
combined_mse_plot = gr.Plot(label="Combined MSE Plot")
|
120 |
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
|
121 |
|
122 |
-
|
123 |
-
df_store = gr.State()
|
124 |
-
mse_features_store = gr.State()
|
125 |
-
mse_posture_store = gr.State()
|
126 |
-
mse_voice_store = gr.State()
|
127 |
-
aligned_faces_folder_store = gr.State()
|
128 |
-
frames_folder_store = gr.State()
|
129 |
-
mse_heatmap_embeddings_store = gr.State()
|
130 |
-
mse_heatmap_posture_store = gr.State()
|
131 |
-
mse_heatmap_voice_store = gr.State()
|
132 |
-
|
133 |
process_btn.click(
|
134 |
-
|
135 |
inputs=[video_input, anomaly_threshold, fps_slider],
|
136 |
outputs=[
|
137 |
-
execution_time,
|
138 |
-
|
139 |
-
mse_features_plot, mse_posture_plot, mse_voice_plot,
|
140 |
mse_features_hist, mse_posture_hist, mse_voice_hist,
|
141 |
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
|
142 |
anomaly_frames_features, anomaly_frames_posture,
|
143 |
-
face_samples_most_frequent,
|
144 |
-
|
145 |
-
mse_heatmap_embeddings_store, mse_heatmap_posture_store, mse_heatmap_voice_store,
|
146 |
-
heatmap_video, combined_mse_plot, correlation_heatmap_plot
|
147 |
]
|
148 |
-
).then(
|
149 |
-
show_results,
|
150 |
-
inputs=None,
|
151 |
-
outputs=results_group
|
152 |
)
|
153 |
|
154 |
if __name__ == "__main__":
|
155 |
-
iface.launch()
|
|
|
2 |
import time
|
3 |
from video_processing import process_video
|
4 |
from PIL import Image
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
|
9 |
+
# Ensure high DPI plots
|
10 |
+
plt.rcParams['figure.dpi'] = 300
|
11 |
+
plt.rcParams['savefig.dpi'] = 300
|
12 |
|
13 |
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
|
14 |
try:
|
|
|
36 |
|
37 |
output = [
|
38 |
exec_time, results_summary,
|
|
|
39 |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
|
40 |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
|
41 |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
|
42 |
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
|
43 |
face_samples_frequent,
|
|
|
|
|
44 |
heatmap_video_path, combined_mse_plot, correlation_heatmap
|
45 |
]
|
46 |
|
|
|
51 |
print(error_message)
|
52 |
import traceback
|
53 |
traceback.print_exc()
|
54 |
+
return [error_message] + [None] * 16
|
55 |
|
56 |
+
def on_button_click(video, threshold, fps):
|
57 |
+
results = process_and_show_completion(video, threshold, fps)
|
58 |
+
|
59 |
+
return {
|
60 |
+
execution_time: gr.update(visible=True, value=results[0]),
|
61 |
+
results_tab: gr.update(visible=True),
|
62 |
+
description_tab: gr.update(visible=False),
|
63 |
+
results_text: results[1],
|
64 |
+
mse_features_plot: results[2],
|
65 |
+
mse_posture_plot: results[3],
|
66 |
+
mse_voice_plot: results[4],
|
67 |
+
mse_features_hist: results[5],
|
68 |
+
mse_posture_hist: results[6],
|
69 |
+
mse_voice_hist: results[7],
|
70 |
+
mse_features_heatmap: results[8],
|
71 |
+
mse_posture_heatmap: results[9],
|
72 |
+
mse_voice_heatmap: results[10],
|
73 |
+
anomaly_frames_features: results[11],
|
74 |
+
anomaly_frames_posture: results[12],
|
75 |
+
face_samples_most_frequent: results[13],
|
76 |
+
heatmap_video: results[14],
|
77 |
+
combined_mse_plot: results[15],
|
78 |
+
correlation_heatmap_plot: results[16]
|
79 |
+
}
|
80 |
|
81 |
with gr.Blocks() as iface:
|
82 |
gr.Markdown("""
|
|
|
93 |
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
|
94 |
process_btn = gr.Button("Detect Anomalies")
|
95 |
progress_bar = gr.Progress()
|
96 |
+
|
97 |
+
execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
|
98 |
|
99 |
+
with gr.Tabs() as tabs:
|
100 |
+
with gr.TabItem("Description", id="description_tab") as description_tab:
|
101 |
with gr.Column():
|
102 |
gr.Markdown("""
|
103 |
# Multimodal Behavioral Anomalies Detection
|
|
|
113 |
- Monitor and assess emotional states in communications.
|
114 |
- Evaluate changes in vocal tone and speech patterns.
|
115 |
|
116 |
+
## Features
|
117 |
+
|
118 |
+
- **Face Extraction**: Extracts faces from video frames using the MTCNN model.
|
119 |
+
- **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
|
120 |
+
- **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
|
121 |
+
- **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
|
122 |
+
- **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
|
123 |
+
- **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
|
124 |
+
|
125 |
+
## Limitations
|
126 |
+
|
127 |
+
- **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
|
128 |
+
- **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
|
129 |
+
- **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
|
130 |
+
- **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
|
131 |
+
- **Generalization**: The model may not generalize well to all types of videos and contexts.
|
132 |
+
- **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
|
133 |
+
|
134 |
+
## Conclusion
|
135 |
+
This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
|
136 |
""")
|
137 |
|
138 |
+
with gr.TabItem("Results", id="results_tab", visible=False) as results_tab:
|
139 |
with gr.Tabs():
|
140 |
with gr.TabItem("Facial Features"):
|
141 |
results_text = gr.TextArea(label="Faces Breakdown", lines=5)
|
|
|
161 |
combined_mse_plot = gr.Plot(label="Combined MSE Plot")
|
162 |
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
process_btn.click(
|
165 |
+
fn=on_button_click,
|
166 |
inputs=[video_input, anomaly_threshold, fps_slider],
|
167 |
outputs=[
|
168 |
+
execution_time, results_tab, description_tab,
|
169 |
+
results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
|
|
|
170 |
mse_features_hist, mse_posture_hist, mse_voice_hist,
|
171 |
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
|
172 |
anomaly_frames_features, anomaly_frames_posture,
|
173 |
+
face_samples_most_frequent, heatmap_video, combined_mse_plot,
|
174 |
+
correlation_heatmap_plot
|
|
|
|
|
175 |
]
|
|
|
|
|
|
|
|
|
176 |
)
|
177 |
|
178 |
if __name__ == "__main__":
|
179 |
+
iface.launch()
|