Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -101,12 +101,7 @@ def extract_frames(video_path, output_folder, fps):
|
|
| 101 |
print(f"FFmpeg stderr: {e.stderr}")
|
| 102 |
raise
|
| 103 |
|
| 104 |
-
def
|
| 105 |
-
print(f"Processing video: {video_path}")
|
| 106 |
-
|
| 107 |
-
frames_folder = os.path.join(os.path.dirname(aligned_faces_folder), 'extracted_frames')
|
| 108 |
-
extract_frames(video_path, frames_folder, desired_fps)
|
| 109 |
-
|
| 110 |
ffprobe_command = [
|
| 111 |
'ffprobe',
|
| 112 |
'-v', 'error',
|
|
@@ -116,63 +111,42 @@ def extract_and_align_faces_from_video(video_path, aligned_faces_folder, desired
|
|
| 116 |
'-of', 'csv=p=0',
|
| 117 |
video_path
|
| 118 |
]
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
print(f"Frame count (raw): {frame_count}")
|
| 129 |
-
print(f"Frame rate (raw): {frame_rate}")
|
| 130 |
-
|
| 131 |
-
try:
|
| 132 |
-
frac = fractions.Fraction(frame_rate)
|
| 133 |
-
original_fps = float(frac.numerator) / float(frac.denominator)
|
| 134 |
-
except (ValueError, ZeroDivisionError):
|
| 135 |
-
print(f"Warning: Could not convert frame rate '{frame_rate}' to float. Using fallback method.")
|
| 136 |
-
duration_command = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path]
|
| 137 |
-
duration = float(subprocess.check_output(duration_command, universal_newlines=True).strip())
|
| 138 |
-
original_fps = int(frame_count) / duration
|
| 139 |
-
|
| 140 |
-
frame_count = int(frame_count)
|
| 141 |
-
|
| 142 |
-
except subprocess.CalledProcessError as e:
|
| 143 |
-
print(f"Error running FFprobe: {e}")
|
| 144 |
-
raise
|
| 145 |
-
except Exception as e:
|
| 146 |
-
print(f"Unexpected error processing video info: {e}")
|
| 147 |
-
raise
|
| 148 |
-
|
| 149 |
-
print(f"Total frames: {frame_count}, Original FPS: {original_fps}, Desired FPS: {desired_fps}")
|
| 150 |
|
|
|
|
| 151 |
embeddings_by_frame = {}
|
| 152 |
emotions_by_frame = {}
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
frame_num = int(frame_file.split('_')[1].split('.')[0])
|
| 157 |
frame_path = os.path.join(frames_folder, frame_file)
|
| 158 |
frame = cv2.imread(frame_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
try:
|
| 167 |
-
boxes, probs = mtcnn.detect(frame)
|
| 168 |
-
if boxes is not None and len(boxes) > 0:
|
| 169 |
-
box = boxes[0]
|
| 170 |
-
if probs[0] >= 0.99:
|
| 171 |
-
x1, y1, x2, y2 = [int(b) for b in box]
|
| 172 |
-
face = frame[y1:y2, x1:x2]
|
| 173 |
-
if face.size == 0:
|
| 174 |
-
print(f"Skipping frame {frame_num}: Detected face region is empty")
|
| 175 |
-
continue
|
| 176 |
aligned_face = alignFace(face)
|
| 177 |
if aligned_face is not None:
|
| 178 |
aligned_face_resized = cv2.resize(aligned_face, (160, 160))
|
|
@@ -181,11 +155,10 @@ def extract_and_align_faces_from_video(video_path, aligned_faces_folder, desired
|
|
| 181 |
embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
|
| 182 |
embeddings_by_frame[frame_num] = embedding
|
| 183 |
emotions_by_frame[frame_num] = emotion
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
continue
|
| 187 |
|
| 188 |
-
return embeddings_by_frame, emotions_by_frame
|
| 189 |
|
| 190 |
def cluster_embeddings(embeddings):
|
| 191 |
if len(embeddings) < 2:
|
|
@@ -273,7 +246,9 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
|
|
| 273 |
|
| 274 |
# Ensure X is 2D
|
| 275 |
if X.dim() == 1:
|
| 276 |
-
X = X.unsqueeze(
|
|
|
|
|
|
|
| 277 |
|
| 278 |
train_size = int(0.85 * len(X))
|
| 279 |
X_train, X_val = X[:train_size], X[train_size:]
|
|
@@ -310,7 +285,10 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
|
|
| 310 |
component_indices = [feature_columns.index(col) for col in component_columns]
|
| 311 |
|
| 312 |
if len(component_indices) > 0:
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
| 314 |
else:
|
| 315 |
mse_comp = mse_all # If no components, use all features
|
| 316 |
|
|
@@ -319,8 +297,8 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
|
|
| 319 |
anomalies_comp[top_indices_comp] = True
|
| 320 |
|
| 321 |
return (anomalies_all, mse_all, top_indices_all,
|
| 322 |
-
|
| 323 |
-
|
| 324 |
|
| 325 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
|
| 326 |
fig, ax = plt.subplots(figsize=(16, 8))
|
|
@@ -379,7 +357,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
| 379 |
frame_count, original_fps = get_video_info(video_path)
|
| 380 |
|
| 381 |
progress(0.3, "Processing frames")
|
| 382 |
-
embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count, progress)
|
| 383 |
|
| 384 |
if not embeddings_by_frame:
|
| 385 |
return "No faces were extracted from the video.", None, None, None, None, None, None
|
|
@@ -396,8 +374,10 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
| 396 |
|
| 397 |
progress(0.9, "Performing anomaly detection")
|
| 398 |
feature_columns = [col for col in df.columns if col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
|
|
|
|
|
|
|
| 399 |
try:
|
| 400 |
-
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
|
| 401 |
except Exception as e:
|
| 402 |
return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
|
| 403 |
|
|
@@ -425,65 +405,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
| 425 |
|
| 426 |
return results, anomaly_plot_all, anomaly_plot_comp, components_plot, *emotion_plots
|
| 427 |
|
| 428 |
-
def get_video_info(video_path):
|
| 429 |
-
ffprobe_command = [
|
| 430 |
-
'ffprobe',
|
| 431 |
-
'-v', 'error',
|
| 432 |
-
'-select_streams', 'v:0',
|
| 433 |
-
'-count_packets',
|
| 434 |
-
'-show_entries', 'stream=nb_read_packets,r_frame_rate',
|
| 435 |
-
'-of', 'csv=p=0',
|
| 436 |
-
video_path
|
| 437 |
-
]
|
| 438 |
-
ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
|
| 439 |
-
frame_rate, frame_count = ffprobe_output
|
| 440 |
-
|
| 441 |
-
frac = fractions.Fraction(frame_rate)
|
| 442 |
-
original_fps = float(frac.numerator) / float(frac.denominator)
|
| 443 |
-
frame_count = int(frame_count)
|
| 444 |
-
|
| 445 |
-
return frame_count, original_fps
|
| 446 |
-
|
| 447 |
-
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
| 448 |
-
embeddings_by_frame = {}
|
| 449 |
-
emotions_by_frame = {}
|
| 450 |
-
|
| 451 |
-
for i, frame_file in enumerate(sorted(os.listdir(frames_folder))):
|
| 452 |
-
if frame_file.endswith('.jpg'):
|
| 453 |
-
frame_num = int(frame_file.split('_')[1].split('.')[0])
|
| 454 |
-
frame_path = os.path.join(frames_folder, frame_file)
|
| 455 |
-
frame = cv2.imread(frame_path)
|
| 456 |
-
|
| 457 |
-
progress((i + 1) / frame_count, f"Processing frame {i + 1} of {frame_count}")
|
| 458 |
-
|
| 459 |
-
if frame is None:
|
| 460 |
-
print(f"Skipping frame {frame_num}: Could not read frame")
|
| 461 |
-
continue
|
| 462 |
-
|
| 463 |
-
try:
|
| 464 |
-
boxes, probs = mtcnn.detect(frame)
|
| 465 |
-
if boxes is not None and len(boxes) > 0:
|
| 466 |
-
box = boxes[0]
|
| 467 |
-
if probs[0] >= 0.99:
|
| 468 |
-
x1, y1, x2, y2 = [int(b) for b in box]
|
| 469 |
-
face = frame[y1:y2, x1:x2]
|
| 470 |
-
if face.size == 0:
|
| 471 |
-
print(f"Skipping frame {frame_num}: Detected face region is empty")
|
| 472 |
-
continue
|
| 473 |
-
aligned_face = alignFace(face)
|
| 474 |
-
if aligned_face is not None:
|
| 475 |
-
aligned_face_resized = cv2.resize(aligned_face, (160, 160))
|
| 476 |
-
output_path = os.path.join(aligned_faces_folder, f"frame_{frame_num}_face.jpg")
|
| 477 |
-
cv2.imwrite(output_path, aligned_face_resized)
|
| 478 |
-
embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
|
| 479 |
-
embeddings_by_frame[frame_num] = embedding
|
| 480 |
-
emotions_by_frame[frame_num] = emotion
|
| 481 |
-
except Exception as e:
|
| 482 |
-
print(f"Error processing frame {frame_num}: {str(e)}")
|
| 483 |
-
continue
|
| 484 |
-
|
| 485 |
-
return embeddings_by_frame, emotions_by_frame
|
| 486 |
-
|
| 487 |
# Gradio interface
|
| 488 |
iface = gr.Interface(
|
| 489 |
fn=process_video,
|
|
@@ -513,7 +434,6 @@ iface = gr.Interface(
|
|
| 513 |
- Number of Components: Complexity of the facial expression model
|
| 514 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
| 515 |
- Batch Size: Affects processing speed and memory usage
|
| 516 |
-
|
| 517 |
"""
|
| 518 |
)
|
| 519 |
|
|
|
|
| 101 |
print(f"FFmpeg stderr: {e.stderr}")
|
| 102 |
raise
|
| 103 |
|
| 104 |
+
def get_video_info(video_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
ffprobe_command = [
|
| 106 |
'ffprobe',
|
| 107 |
'-v', 'error',
|
|
|
|
| 111 |
'-of', 'csv=p=0',
|
| 112 |
video_path
|
| 113 |
]
|
| 114 |
+
ffprobe_output = subprocess.check_output(ffprobe_command, universal_newlines=True).strip().split(',')
|
| 115 |
+
frame_rate, frame_count = ffprobe_output
|
| 116 |
+
|
| 117 |
+
frac = fractions.Fraction(frame_rate)
|
| 118 |
+
original_fps = float(frac.numerator) / float(frac.denominator)
|
| 119 |
+
frame_count = int(frame_count)
|
| 120 |
+
|
| 121 |
+
return frame_count, original_fps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size):
|
| 124 |
embeddings_by_frame = {}
|
| 125 |
emotions_by_frame = {}
|
| 126 |
+
frame_files = sorted([f for f in os.listdir(frames_folder) if f.endswith('.jpg')])
|
| 127 |
+
|
| 128 |
+
for i in range(0, len(frame_files), batch_size):
|
| 129 |
+
batch_files = frame_files[i:i+batch_size]
|
| 130 |
+
batch_frames = []
|
| 131 |
+
batch_nums = []
|
| 132 |
+
|
| 133 |
+
for frame_file in batch_files:
|
| 134 |
frame_num = int(frame_file.split('_')[1].split('.')[0])
|
| 135 |
frame_path = os.path.join(frames_folder, frame_file)
|
| 136 |
frame = cv2.imread(frame_path)
|
| 137 |
+
if frame is not None:
|
| 138 |
+
batch_frames.append(frame)
|
| 139 |
+
batch_nums.append(frame_num)
|
| 140 |
+
|
| 141 |
+
if batch_frames:
|
| 142 |
+
# Detect faces in batch
|
| 143 |
+
batch_boxes, batch_probs = mtcnn.detect(batch_frames)
|
| 144 |
|
| 145 |
+
for j, (frame, frame_num, boxes, probs) in enumerate(zip(batch_frames, batch_nums, batch_boxes, batch_probs)):
|
| 146 |
+
if boxes is not None and len(boxes) > 0 and probs[0] >= 0.99:
|
| 147 |
+
x1, y1, x2, y2 = [int(b) for b in boxes[0]]
|
| 148 |
+
face = frame[y1:y2, x1:x2]
|
| 149 |
+
if face.size > 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
aligned_face = alignFace(face)
|
| 151 |
if aligned_face is not None:
|
| 152 |
aligned_face_resized = cv2.resize(aligned_face, (160, 160))
|
|
|
|
| 155 |
embedding, emotion = get_face_embedding_and_emotion(aligned_face_resized)
|
| 156 |
embeddings_by_frame[frame_num] = embedding
|
| 157 |
emotions_by_frame[frame_num] = emotion
|
| 158 |
+
|
| 159 |
+
progress((i + len(batch_files)) / frame_count, f"Processing frames {i + 1} to {min(i + len(batch_files), frame_count)} of {frame_count}")
|
|
|
|
| 160 |
|
| 161 |
+
return embeddings_by_frame, emotions_by_frame
|
| 162 |
|
| 163 |
def cluster_embeddings(embeddings):
|
| 164 |
if len(embeddings) < 2:
|
|
|
|
| 246 |
|
| 247 |
# Ensure X is 2D
|
| 248 |
if X.dim() == 1:
|
| 249 |
+
X = X.unsqueeze(1) # Add a feature dimension
|
| 250 |
+
elif X.dim() > 2:
|
| 251 |
+
raise ValueError(f"Input X should be 1D or 2D, but got {X.dim()} dimensions")
|
| 252 |
|
| 253 |
train_size = int(0.85 * len(X))
|
| 254 |
X_train, X_val = X[:train_size], X[train_size:]
|
|
|
|
| 285 |
component_indices = [feature_columns.index(col) for col in component_columns]
|
| 286 |
|
| 287 |
if len(component_indices) > 0:
|
| 288 |
+
if X.dim() == 1:
|
| 289 |
+
mse_comp = mse_all # If X is 1D, we can't select specific components
|
| 290 |
+
else:
|
| 291 |
+
mse_comp = np.mean(np.power(X.cpu().numpy()[:, component_indices] - reconstructed[:, component_indices], 2), axis=1)
|
| 292 |
else:
|
| 293 |
mse_comp = mse_all # If no components, use all features
|
| 294 |
|
|
|
|
| 297 |
anomalies_comp[top_indices_comp] = True
|
| 298 |
|
| 299 |
return (anomalies_all, mse_all, top_indices_all,
|
| 300 |
+
anomalies_comp, mse_comp, top_indices_comp,
|
| 301 |
+
model)
|
| 302 |
|
| 303 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title):
|
| 304 |
fig, ax = plt.subplots(figsize=(16, 8))
|
|
|
|
| 357 |
frame_count, original_fps = get_video_info(video_path)
|
| 358 |
|
| 359 |
progress(0.3, "Processing frames")
|
| 360 |
+
embeddings_by_frame, emotions_by_frame = process_frames(frames_folder, aligned_faces_folder, frame_count, progress, batch_size)
|
| 361 |
|
| 362 |
if not embeddings_by_frame:
|
| 363 |
return "No faces were extracted from the video.", None, None, None, None, None, None
|
|
|
|
| 374 |
|
| 375 |
progress(0.9, "Performing anomaly detection")
|
| 376 |
feature_columns = [col for col in df.columns if col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
|
| 377 |
+
X = df[feature_columns].values
|
| 378 |
+
print(f"Shape of input data: {X.shape}") # Debug print
|
| 379 |
try:
|
| 380 |
+
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(X, feature_columns, num_anomalies=num_anomalies, batch_size=batch_size)
|
| 381 |
except Exception as e:
|
| 382 |
return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None
|
| 383 |
|
|
|
|
| 405 |
|
| 406 |
return results, anomaly_plot_all, anomaly_plot_comp, components_plot, *emotion_plots
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
# Gradio interface
|
| 409 |
iface = gr.Interface(
|
| 410 |
fn=process_video,
|
|
|
|
| 434 |
- Number of Components: Complexity of the facial expression model
|
| 435 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
| 436 |
- Batch Size: Affects processing speed and memory usage
|
|
|
|
| 437 |
"""
|
| 438 |
)
|
| 439 |
|