reab5555 commited on
Commit
064088f
·
verified ·
1 Parent(s): 695f153

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -117
app.py CHANGED
@@ -2,13 +2,10 @@ import gradio as gr
2
  import time
3
  from video_processing import process_video
4
  from PIL import Image
5
- import matplotlib.pyplot as plt
6
- import numpy as np
7
- import pandas as pd
8
 
9
- # Ensure high DPI plots
10
- plt.rcParams['figure.dpi'] = 300
11
- plt.rcParams['savefig.dpi'] = 300
12
 
13
  def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
14
  try:
@@ -36,11 +33,14 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
36
 
37
  output = [
38
  exec_time, results_summary,
 
39
  mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
40
  mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
41
  mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
42
  anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
43
  face_samples_frequent,
 
 
44
  heatmap_video_path, combined_mse_plot, correlation_heatmap
45
  ]
46
 
@@ -51,44 +51,10 @@ def process_and_show_completion(video_input_path, anomaly_threshold_input, fps,
51
  print(error_message)
52
  import traceback
53
  traceback.print_exc()
54
- return [error_message] + [None] * 16
55
 
56
- def on_button_click(video, threshold, fps):
57
- start_time = time.time()
58
-
59
- # Show execution time immediately and hide description
60
- yield {
61
- execution_time: gr.update(visible=True, value=0),
62
- description: gr.update(visible=False),
63
- results: gr.update(visible=True)
64
- }
65
-
66
- process_results = process_and_show_completion(video, threshold, fps)
67
- end_time = time.time()
68
- exec_time = end_time - start_time
69
-
70
- return {
71
- execution_time: gr.update(visible=True, value=exec_time),
72
- results_text: process_results[1],
73
- mse_features_plot: process_results[2],
74
- mse_posture_plot: process_results[3],
75
- mse_voice_plot: process_results[4],
76
- mse_features_hist: process_results[5],
77
- mse_posture_hist: process_results[6],
78
- mse_voice_hist: process_results[7],
79
- mse_features_heatmap: process_results[8],
80
- mse_posture_heatmap: process_results[9],
81
- mse_voice_heatmap: process_results[10],
82
- anomaly_frames_features: process_results[11],
83
- anomaly_frames_posture: process_results[12],
84
- face_samples_most_frequent: process_results[13],
85
- heatmap_video: process_results[14],
86
- combined_mse_plot: process_results[15],
87
- correlation_heatmap_plot: process_results[16],
88
- video_display_facial: video,
89
- video_display_body: video,
90
- video_display_voice: video
91
- }
92
 
93
  with gr.Blocks() as iface:
94
  gr.Markdown("""
@@ -98,92 +64,71 @@ with gr.Blocks() as iface:
98
  It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
99
  """)
100
 
101
- video_input = gr.Video(label="Input Video", visible=True)
 
102
 
103
  anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
104
  fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
105
  process_btn = gr.Button("Detect Anomalies")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- execution_time = gr.Number(label="Execution Time (seconds)", visible=False)
108
-
109
- description = gr.Markdown(visible=True, value="""
110
- # Multimodal Behavioral Anomalies Detection
111
-
112
- The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video.
113
-
114
- It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach.
115
-
116
- ## Applications
117
-
118
- - Identify suspicious behavior in surveillance footage.
119
- - Analyze micro-expressions.
120
- - Monitor and assess emotional states in communications.
121
- - Evaluate changes in vocal tone and speech patterns.
122
-
123
- ## Features
124
-
125
- - **Face Extraction**: Extracts faces from video frames using the MTCNN model.
126
- - **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model.
127
- - **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose.
128
- - **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote.
129
- - **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time.
130
- - **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points.
131
-
132
- ## Limitations
133
-
134
- - **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against.
135
- - **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent.
136
- - **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features.
137
- - **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis.
138
- - **Generalization**: The model may not generalize well to all types of videos and contexts.
139
- - **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding.
140
-
141
- ## Conclusion
142
- This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution.
143
- """)
144
-
145
- results = gr.Tabs(visible=False)
146
- with results:
147
- with gr.TabItem("Facial Features"):
148
- video_display_facial = gr.Video(label="Input Video")
149
- results_text = gr.TextArea(label="Faces Breakdown", lines=5)
150
- mse_features_plot = gr.Plot(label="MSE: Facial Features")
151
- mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
152
- mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
153
- anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
154
- face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
155
-
156
- with gr.TabItem("Body Posture"):
157
- video_display_body = gr.Video(label="Input Video")
158
- mse_posture_plot = gr.Plot(label="MSE: Body Posture")
159
- mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
160
- mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
161
- anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
162
-
163
- with gr.TabItem("Voice"):
164
- video_display_voice = gr.Video(label="Input Video")
165
- mse_voice_plot = gr.Plot(label="MSE: Voice")
166
- mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
167
- mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
168
-
169
- with gr.TabItem("Combined"):
170
- heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
171
- combined_mse_plot = gr.Plot(label="Combined MSE Plot")
172
- correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
173
-
174
  process_btn.click(
175
- fn=on_button_click,
176
  inputs=[video_input, anomaly_threshold, fps_slider],
177
  outputs=[
178
- execution_time, description, results,
179
- results_text, mse_features_plot, mse_posture_plot, mse_voice_plot,
 
180
  mse_features_hist, mse_posture_hist, mse_voice_hist,
181
  mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
182
  anomaly_frames_features, anomaly_frames_posture,
183
- face_samples_most_frequent, heatmap_video, combined_mse_plot,
184
- correlation_heatmap_plot, video_display_facial, video_display_body, video_display_voice
 
 
185
  ]
 
 
 
 
186
  )
187
 
188
  if __name__ == "__main__":
189
- iface.launch(share=True)
 
2
  import time
3
  from video_processing import process_video
4
  from PIL import Image
5
+ import matplotlib
 
 
6
 
7
+ matplotlib.rcParams['figure.dpi'] = 300
8
+ matplotlib.rcParams['savefig.dpi'] = 300
 
9
 
10
  def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
11
  try:
 
33
 
34
  output = [
35
  exec_time, results_summary,
36
+ df, mse_embeddings, mse_posture, mse_voice,
37
  mse_plot_embeddings, mse_plot_posture, mse_plot_voice,
38
  mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice,
39
  mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice,
40
  anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
41
  face_samples_frequent,
42
+ aligned_faces_folder, frames_folder,
43
+ mse_embeddings, mse_posture, mse_voice,
44
  heatmap_video_path, combined_mse_plot, correlation_heatmap
45
  ]
46
 
 
51
  print(error_message)
52
  import traceback
53
  traceback.print_exc()
54
+ return [error_message] + [None] * 27
55
 
56
+ def show_results(outputs):
57
+ return gr.Group(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  with gr.Blocks() as iface:
60
  gr.Markdown("""
 
64
  It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
65
  """)
66
 
67
+ with gr.Row():
68
+ video_input = gr.Video()
69
 
70
  anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
71
  fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
72
  process_btn = gr.Button("Detect Anomalies")
73
+ progress_bar = gr.Progress()
74
+ execution_time = gr.Number(label="Execution Time (seconds)")
75
+
76
+ with gr.Group(visible=False) as results_group:
77
+ with gr.Tabs():
78
+ with gr.TabItem("Facial Features"):
79
+ results_text = gr.TextArea(label="Faces Breakdown", lines=5)
80
+ mse_features_plot = gr.Plot(label="MSE: Facial Features")
81
+ mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
82
+ mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
83
+ anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
84
+ face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")
85
+
86
+ with gr.TabItem("Body Posture"):
87
+ mse_posture_plot = gr.Plot(label="MSE: Body Posture")
88
+ mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
89
+ mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
90
+ anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")
91
+
92
+ with gr.TabItem("Voice"):
93
+ mse_voice_plot = gr.Plot(label="MSE: Voice")
94
+ mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
95
+ mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
96
+
97
+ with gr.TabItem("Combined"):
98
+ heatmap_video = gr.Video(label="Video with Anomaly Heatmap")
99
+ combined_mse_plot = gr.Plot(label="Combined MSE Plot")
100
+ correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap")
101
+
102
+ df_store = gr.State()
103
+ mse_features_store = gr.State()
104
+ mse_posture_store = gr.State()
105
+ mse_voice_store = gr.State()
106
+ aligned_faces_folder_store = gr.State()
107
+ frames_folder_store = gr.State()
108
+ mse_heatmap_embeddings_store = gr.State()
109
+ mse_heatmap_posture_store = gr.State()
110
+ mse_heatmap_voice_store = gr.State()
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  process_btn.click(
113
+ process_and_show_completion,
114
  inputs=[video_input, anomaly_threshold, fps_slider],
115
  outputs=[
116
+ execution_time, results_text, df_store,
117
+ mse_features_store, mse_posture_store, mse_voice_store,
118
+ mse_features_plot, mse_posture_plot, mse_voice_plot,
119
  mse_features_hist, mse_posture_hist, mse_voice_hist,
120
  mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap,
121
  anomaly_frames_features, anomaly_frames_posture,
122
+ face_samples_most_frequent,
123
+ aligned_faces_folder_store, frames_folder_store,
124
+ mse_heatmap_embeddings_store, mse_heatmap_posture_store, mse_heatmap_voice_store,
125
+ heatmap_video, combined_mse_plot, correlation_heatmap_plot
126
  ]
127
+ ).then(
128
+ show_results,
129
+ inputs=None,
130
+ outputs=results_group
131
  )
132
 
133
  if __name__ == "__main__":
134
+ iface.launch()