Spaces:
Runtime error
Runtime error
Update video_processing.py
Browse files- video_processing.py +20 -51
video_processing.py
CHANGED
|
@@ -94,12 +94,6 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 94 |
output_folder = "output"
|
| 95 |
os.makedirs(output_folder, exist_ok=True)
|
| 96 |
|
| 97 |
-
GRAPH_COLORS = {
|
| 98 |
-
'facial_embeddings': 'navy',
|
| 99 |
-
'body_posture': 'purple',
|
| 100 |
-
'voice': 'green'
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 104 |
aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
|
| 105 |
organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
|
|
@@ -151,97 +145,71 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 151 |
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
| 152 |
|
| 153 |
progress(0.8, "Extracting audio and performing voice analysis")
|
| 154 |
-
|
| 155 |
audio_path = extract_audio_from_video(video_path)
|
| 156 |
diarization, most_frequent_speaker = diarize_speakers(audio_path)
|
| 157 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
|
| 158 |
-
|
| 159 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
| 160 |
-
|
| 161 |
progress(0.85, "Performing anomaly detection")
|
| 162 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
| 163 |
-
|
| 164 |
-
# Ensure X_voice has the same length as X_embeddings
|
| 165 |
X_embeddings = df[embedding_columns].values
|
| 166 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
| 167 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
| 168 |
X_voice = np.array(aligned_voice_embeddings)
|
| 169 |
-
|
| 170 |
|
| 171 |
-
# Trim or pad X_voice to match X_embeddings length
|
| 172 |
if len(X_voice) > len(X_embeddings):
|
| 173 |
X_voice = X_voice[:len(X_embeddings)]
|
| 174 |
elif len(X_voice) < len(X_embeddings):
|
| 175 |
padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
|
| 176 |
X_voice = np.vstack((X_voice, padding))
|
| 177 |
-
|
| 178 |
try:
|
| 179 |
if len(X_posture) == 0:
|
| 180 |
raise ValueError("No valid posture data found")
|
| 181 |
-
|
| 182 |
mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
|
| 183 |
-
|
| 184 |
progress(0.9, "Generating graphs")
|
| 185 |
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
| 186 |
-
color=
|
| 187 |
anomaly_threshold=anomaly_threshold)
|
| 188 |
|
| 189 |
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
| 190 |
-
anomaly_threshold, color=
|
| 191 |
|
| 192 |
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
| 193 |
-
color=
|
| 194 |
anomaly_threshold=anomaly_threshold)
|
| 195 |
|
| 196 |
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
| 197 |
-
anomaly_threshold, color=
|
| 198 |
|
| 199 |
mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
|
| 200 |
-
color=
|
| 201 |
anomaly_threshold=anomaly_threshold)
|
| 202 |
|
| 203 |
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
| 204 |
-
anomaly_threshold, color=
|
| 205 |
|
| 206 |
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
| 207 |
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
| 208 |
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
| 209 |
|
| 210 |
-
# Generate the correlation heatmap
|
| 211 |
correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
|
| 212 |
-
|
| 213 |
-
# Generate the 3D scatter plot
|
| 214 |
scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
video_path, df, mse_embeddings, mse_posture, mse_voice,
|
| 223 |
-
output_folder, original_fps, largest_cluster
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
-
if heatmap_video_path is None:
|
| 227 |
-
print("Failed to create heatmap video")
|
| 228 |
-
else:
|
| 229 |
-
print(f"Heatmap video path from create_video_with_heatmap: {heatmap_video_path}")
|
| 230 |
-
|
| 231 |
-
if progress is not None:
|
| 232 |
-
progress(1.0, desc="Video processing complete")
|
| 233 |
-
except Exception as e:
|
| 234 |
-
print(f"Error in create_video_with_heatmap: {str(e)}")
|
| 235 |
-
import traceback
|
| 236 |
-
traceback.print_exc()
|
| 237 |
-
heatmap_video_path = None
|
| 238 |
-
|
| 239 |
-
|
| 240 |
except Exception as e:
|
| 241 |
print(f"Error details: {str(e)}")
|
| 242 |
import traceback
|
| 243 |
traceback.print_exc()
|
| 244 |
-
return (f"Error in video processing: {str(e)}",) + (None,) *
|
| 245 |
|
| 246 |
progress(1.0, "Preparing results")
|
| 247 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
@@ -314,6 +282,7 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 314 |
)
|
| 315 |
|
| 316 |
|
|
|
|
| 317 |
def is_frontal_face(landmarks, threshold=60):
|
| 318 |
nose_tip = landmarks[4]
|
| 319 |
left_chin = landmarks[234]
|
|
|
|
| 94 |
output_folder = "output"
|
| 95 |
os.makedirs(output_folder, exist_ok=True)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 98 |
aligned_faces_folder = os.path.join(temp_dir, 'aligned_faces')
|
| 99 |
organized_faces_folder = os.path.join(temp_dir, 'organized_faces')
|
|
|
|
| 145 |
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
| 146 |
|
| 147 |
progress(0.8, "Extracting audio and performing voice analysis")
|
|
|
|
| 148 |
audio_path = extract_audio_from_video(video_path)
|
| 149 |
diarization, most_frequent_speaker = diarize_speakers(audio_path)
|
| 150 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, most_frequent_speaker)
|
|
|
|
| 151 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
| 152 |
+
|
| 153 |
progress(0.85, "Performing anomaly detection")
|
| 154 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
| 155 |
+
|
|
|
|
| 156 |
X_embeddings = df[embedding_columns].values
|
| 157 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
| 158 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
| 159 |
X_voice = np.array(aligned_voice_embeddings)
|
|
|
|
| 160 |
|
|
|
|
| 161 |
if len(X_voice) > len(X_embeddings):
|
| 162 |
X_voice = X_voice[:len(X_embeddings)]
|
| 163 |
elif len(X_voice) < len(X_embeddings):
|
| 164 |
padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
|
| 165 |
X_voice = np.vstack((X_voice, padding))
|
| 166 |
+
|
| 167 |
try:
|
| 168 |
if len(X_posture) == 0:
|
| 169 |
raise ValueError("No valid posture data found")
|
| 170 |
+
|
| 171 |
mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
|
| 172 |
+
|
| 173 |
progress(0.9, "Generating graphs")
|
| 174 |
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
| 175 |
+
color='navy',
|
| 176 |
anomaly_threshold=anomaly_threshold)
|
| 177 |
|
| 178 |
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
| 179 |
+
anomaly_threshold, color='navy')
|
| 180 |
|
| 181 |
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
| 182 |
+
color='purple',
|
| 183 |
anomaly_threshold=anomaly_threshold)
|
| 184 |
|
| 185 |
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
| 186 |
+
anomaly_threshold, color='purple')
|
| 187 |
|
| 188 |
mse_plot_voice, anomaly_frames_voice = plot_mse(df, mse_voice, "Voice",
|
| 189 |
+
color='green',
|
| 190 |
anomaly_threshold=anomaly_threshold)
|
| 191 |
|
| 192 |
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
| 193 |
+
anomaly_threshold, color='green')
|
| 194 |
|
| 195 |
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
| 196 |
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
| 197 |
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
| 198 |
|
|
|
|
| 199 |
correlation_heatmap = plot_correlation_heatmap(mse_embeddings, mse_posture, mse_voice)
|
|
|
|
|
|
|
| 200 |
scatter_plot_3d = plot_3d_scatter(mse_embeddings, mse_posture, mse_voice)
|
| 201 |
+
|
| 202 |
+
progress(0.95, "Generating video with heatmap")
|
| 203 |
+
heatmap_video_path = create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice,
|
| 204 |
+
output_folder, original_fps, largest_cluster)
|
| 205 |
+
|
| 206 |
+
progress(1.0, "Video processing complete")
|
| 207 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
except Exception as e:
|
| 209 |
print(f"Error details: {str(e)}")
|
| 210 |
import traceback
|
| 211 |
traceback.print_exc()
|
| 212 |
+
return (f"Error in video processing: {str(e)}",) + (None,) * 25
|
| 213 |
|
| 214 |
progress(1.0, "Preparing results")
|
| 215 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
|
|
| 282 |
)
|
| 283 |
|
| 284 |
|
| 285 |
+
|
| 286 |
def is_frontal_face(landmarks, threshold=60):
|
| 287 |
nose_tip = landmarks[4]
|
| 288 |
left_chin = landmarks[234]
|