Update video_processing.py
Browse files- video_processing.py +20 -51
video_processing.py
CHANGED
@@ -94,12 +94,6 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
94 |
output_folder = "output"
|
95 |
os.makedirs(output_folder, exist_ok=True)
|
96 |
|
97 |
-
GRAPH_COLORS = {
|
98 |
-
'facial_embeddings': 'navy',
|
99 |
-
'body_posture': 'purple',
|
100 |
-
'voice': 'green'
|
101 |
-
}
|
102 |
-
|
103 |
with tempfile.TemporaryDirectory() as temp_dir:
|
104 |
aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
|
105 |
organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
|
@@ -151,97 +145,71 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
151 |
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
152 |
|
153 |
progress(0.8, "Extracting audio and performing voice analysis")
|
154 |
-
|
155 |
audio_path = extract_audio_from_video(video_path)
|
156 |
diarization, most_frequent_speaker = diarize_speakers(audio_path)
|
157 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
|
158 |
-
|
159 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
160 |
-
|
161 |
progress(0.85, "Performing anomaly detection")
|
162 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
163 |
-
|
164 |
-
# Ensure X_voice has the same length as X_embeddings
|
165 |
X_embeddings = df[embedding_columns].values
|
166 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
167 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
168 |
X_voice = np.array(aligned_voice_embeddings)
|
169 |
-
|
170 |
|
171 |
-
# Trim or pad X_voice to match X_embeddings length
|
172 |
if len(X_voice) > len(X_embeddings):
|
173 |
X_voice = X_voice[:len(X_embeddings)]
|
174 |
elif len(X_voice) < len(X_embeddings):
|
175 |
padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
|
176 |
X_voice = np.vstack((X_voice, padding))
|
177 |
-
|
178 |
try:
|
179 |
if len(X_posture) == 0:
|
180 |
raise ValueError("No valid posture data found")
|
181 |
-
|
182 |
mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
|
183 |
-
|
184 |
progress(0.9, "Generating graphs")
|
185 |
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
186 |
-
color=
|
187 |
anomaly_threshold=anomaly_threshold)
|
188 |
|
189 |
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
190 |
-
anomaly_threshold, color=
|
191 |
|
192 |
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
193 |
-
color=
|
194 |
anomaly_threshold=anomaly_threshold)
|
195 |
|
196 |
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
197 |
-
anomaly_threshold, color=
|
198 |
|
199 |
mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
|
200 |
-
color=
|
201 |
anomaly_threshold=anomaly_threshold)
|
202 |
|
203 |
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
204 |
-
anomaly_threshold, color=
|
205 |
|
206 |
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
207 |
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
208 |
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
209 |
|
210 |
-
# Generate the correlation heatmap
|
211 |
correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
|
212 |
-
|
213 |
-
# Generate the 3D scatter plot
|
214 |
scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
video_path, df, mse_embeddings, mse_posture, mse_voice,
|
223 |
-
output_folder, original_fps, largest_cluster
|
224 |
-
)
|
225 |
-
|
226 |
-
if heatmap_video_path is None:
|
227 |
-
print("Failed to create heatmap video")
|
228 |
-
else:
|
229 |
-
print(f"Heatmap video path from create_video_with_heatmap: {heatmap_video_path}")
|
230 |
-
|
231 |
-
if progress is not None:
|
232 |
-
progress(1.0, desc="Video processing complete")
|
233 |
-
except Exception as e:
|
234 |
-
print(f"Error in create_video_with_heatmap: {str(e)}")
|
235 |
-
import traceback
|
236 |
-
traceback.print_exc()
|
237 |
-
heatmap_video_path = None
|
238 |
-
|
239 |
-
|
240 |
except Exception as e:
|
241 |
print(f"Error details: {str(e)}")
|
242 |
import traceback
|
243 |
traceback.print_exc()
|
244 |
-
return (f"Error in video processing: {str(e)}",) + (None,) *
|
245 |
|
246 |
progress(1.0, "Preparing results")
|
247 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
@@ -314,6 +282,7 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
314 |
)
|
315 |
|
316 |
|
|
|
317 |
def is_frontal_face(landmarks, threshold=60):
|
318 |
nose_tip = landmarks[4]
|
319 |
left_chin = landmarks[234]
|
|
|
94 |
output_folder = "output"
|
95 |
os.makedirs(output_folder, exist_ok=True)
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
with tempfile.TemporaryDirectory() as temp_dir:
|
98 |
aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
|
99 |
organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
|
|
|
145 |
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
146 |
|
147 |
progress(0.8, "Extracting audio and performing voice analysis")
|
|
|
148 |
audio_path = extract_audio_from_video(video_path)
|
149 |
diarization, most_frequent_speaker = diarize_speakers(audio_path)
|
150 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
|
|
|
151 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
152 |
+
|
153 |
progress(0.85, "Performing anomaly detection")
|
154 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
155 |
+
|
|
|
156 |
X_embeddings = df[embedding_columns].values
|
157 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
158 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
159 |
X_voice = np.array(aligned_voice_embeddings)
|
|
|
160 |
|
|
|
161 |
if len(X_voice) > len(X_embeddings):
|
162 |
X_voice = X_voice[:len(X_embeddings)]
|
163 |
elif len(X_voice) < len(X_embeddings):
|
164 |
padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
|
165 |
X_voice = np.vstack((X_voice, padding))
|
166 |
+
|
167 |
try:
|
168 |
if len(X_posture) == 0:
|
169 |
raise ValueError("No valid posture data found")
|
170 |
+
|
171 |
mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
|
172 |
+
|
173 |
progress(0.9, "Generating graphs")
|
174 |
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
175 |
+
color='navy',
|
176 |
anomaly_threshold=anomaly_threshold)
|
177 |
|
178 |
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
179 |
+
anomaly_threshold, color='navy')
|
180 |
|
181 |
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
182 |
+
color='purple',
|
183 |
anomaly_threshold=anomaly_threshold)
|
184 |
|
185 |
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
186 |
+
anomaly_threshold, color='purple')
|
187 |
|
188 |
mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
|
189 |
+
color='green',
|
190 |
anomaly_threshold=anomaly_threshold)
|
191 |
|
192 |
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
193 |
+
anomaly_threshold, color='green')
|
194 |
|
195 |
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
196 |
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
197 |
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
198 |
|
|
|
199 |
correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
|
|
|
|
|
200 |
scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
|
201 |
+
|
202 |
+
progress(0.95, "Generating video with heatmap")
|
203 |
+
heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice,
|
204 |
+
output_folder, original_fps, largest_cluster)
|
205 |
+
|
206 |
+
progress(1.0, "Video processing complete")
|
207 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
except Exception as e:
|
209 |
print(f"Error details: {str(e)}")
|
210 |
import traceback
|
211 |
traceback.print_exc()
|
212 |
+
return (f"Error in video processing: {str(e)}",) + (None,) * 25
|
213 |
|
214 |
progress(1.0, "Preparing results")
|
215 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
|
282 |
)
|
283 |
|
284 |
|
285 |
+
|
286 |
def is_frontal_face(landmarks, threshold=60):
|
287 |
nose_tip = landmarks[4]
|
288 |
left_chin = landmarks[234]
|