Update app.py
Browse files
app.py
CHANGED
@@ -35,8 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
|
|
35 |
# Initialize models and other global variables
|
36 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
37 |
|
38 |
-
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.
|
39 |
-
selection_method='largest')
|
40 |
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
|
41 |
mp_face_mesh = mp.solutions.face_mesh
|
42 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
|
@@ -156,7 +155,6 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
|
|
156 |
|
157 |
return embeddings_by_frame, emotions_by_frame, aligned_face_paths
|
158 |
|
159 |
-
|
160 |
def cluster_faces(embeddings):
|
161 |
if len(embeddings) < 2:
|
162 |
print("Not enough faces for clustering. Assigning all to one cluster.")
|
@@ -228,7 +226,7 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
|
|
228 |
|
229 |
return df, largest_cluster
|
230 |
|
231 |
-
def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
|
232 |
mean = np.mean(anomaly_scores)
|
233 |
std = np.std(anomaly_scores)
|
234 |
threshold = mean + z_threshold * std
|
@@ -239,7 +237,7 @@ def timecode_to_seconds(timecode):
|
|
239 |
h, m, s = map(float, timecode.split(':'))
|
240 |
return h * 3600 + m * 60 + s
|
241 |
|
242 |
-
def group_similar_timecodes(timecodes, scores, threshold_seconds=
|
243 |
grouped = []
|
244 |
current_group = []
|
245 |
|
@@ -282,7 +280,7 @@ def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
|
|
282 |
|
283 |
print(f"X shape after reshaping: {X.shape}")
|
284 |
|
285 |
-
train_size = int(0.
|
286 |
X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
|
287 |
|
288 |
model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
|
@@ -366,6 +364,34 @@ def plot_to_image(fig):
|
|
366 |
buf.seek(0)
|
367 |
return buf
|
368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
370 |
plt.figure(figsize=(16, 8), dpi=300)
|
371 |
fig, ax = plt.subplots(figsize=(16, 8))
|
@@ -373,25 +399,25 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
|
373 |
df['Seconds'] = df['Timecode'].apply(
|
374 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
375 |
|
376 |
-
|
377 |
-
|
378 |
-
seconds = df['Seconds'].
|
379 |
-
scores =
|
380 |
|
381 |
ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
|
382 |
|
383 |
-
top_indices = [idx for idx in top_indices if idx
|
384 |
-
ax.scatter(df['Seconds'].iloc[top_indices],
|
385 |
|
386 |
# Calculate and plot baseline
|
387 |
-
non_anomalous_scores = np.delete(
|
388 |
baseline = np.mean(non_anomalous_scores)
|
389 |
ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
|
390 |
ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
|
391 |
verticalalignment='bottom', horizontalalignment='right', color='black')
|
392 |
|
393 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
394 |
-
|
395 |
|
396 |
for group in grouped_timecodes:
|
397 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
@@ -424,12 +450,14 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
|
424 |
df['Seconds'] = df['Timecode'].apply(
|
425 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
426 |
|
427 |
-
|
428 |
-
|
|
|
|
|
429 |
|
430 |
ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
|
431 |
|
432 |
-
top_indices = [idx for idx in top_indices if idx
|
433 |
ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
|
434 |
|
435 |
# Calculate and plot baseline
|
@@ -467,28 +495,33 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
|
467 |
plt.close()
|
468 |
return fig
|
469 |
|
470 |
-
def
|
471 |
-
face_samples = []
|
472 |
-
for cluster_folder in os.listdir(organized_faces_folder):
|
473 |
if cluster_folder.startswith("person_"):
|
474 |
person_folder = os.path.join(organized_faces_folder, cluster_folder)
|
475 |
-
face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
|
476 |
if face_files:
|
477 |
-
|
478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
else:
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
cv2.imwrite(output_path, small_face)
|
489 |
-
face_samples.append(output_path)
|
490 |
return face_samples
|
491 |
-
|
492 |
def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
|
493 |
output_folder = "output"
|
494 |
os.makedirs(output_folder, exist_ok=True)
|
@@ -519,7 +552,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
519 |
|
520 |
if not aligned_face_paths:
|
521 |
return ("No faces were extracted from the video.",
|
522 |
-
None, None, None, None, None, None, None, None
|
523 |
|
524 |
progress(0.6, "Clustering faces")
|
525 |
embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
|
@@ -534,7 +567,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
534 |
original_fps, temp_dir, num_components, video_duration)
|
535 |
|
536 |
progress(0.85, "Getting face samples")
|
537 |
-
face_samples =
|
538 |
|
539 |
progress(0.9, "Performing anomaly detection")
|
540 |
feature_columns = [col for col in df.columns if
|
@@ -559,7 +592,7 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
559 |
|
560 |
except Exception as e:
|
561 |
print(f"Error details: {str(e)}")
|
562 |
-
return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None
|
563 |
|
564 |
progress(0.95, "Generating plots")
|
565 |
try:
|
@@ -578,41 +611,36 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
578 |
['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
|
579 |
]
|
580 |
except Exception as e:
|
581 |
-
return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None
|
582 |
|
583 |
progress(1.0, "Preparing results")
|
584 |
results = f"Number of persons/clusters detected: {num_clusters}\n\n"
|
585 |
results += f"Breakdown of persons/clusters:\n"
|
586 |
for cluster_id in range(num_clusters):
|
587 |
results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
|
588 |
-
|
589 |
-
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
590 |
-
zip(anomaly_scores_all[top_indices_all[1:]],
|
591 |
-
df['Timecode'].iloc[top_indices_all[1:]].values)])
|
592 |
-
results += f"\n\nAnomalies (Facial Features):\n"
|
593 |
-
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
594 |
-
zip(anomaly_scores_comp[top_indices_comp[1:]],
|
595 |
-
df['Timecode'].iloc[top_indices_comp[1:]].values)])
|
596 |
-
|
597 |
-
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
598 |
-
results += f"\n\n{emotion.capitalize()} Anomalies:\n"
|
599 |
-
results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
|
600 |
-
for i in emotion_anomalies[emotion]['indices'] if i > 0])
|
601 |
|
602 |
return (
|
603 |
results,
|
604 |
anomaly_plot_all,
|
605 |
anomaly_plot_comp,
|
606 |
*emotion_plots,
|
607 |
-
face_samples
|
|
|
608 |
)
|
609 |
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
iface = gr.Interface(
|
611 |
fn=process_video,
|
612 |
inputs=[
|
613 |
gr.Video(),
|
614 |
-
gr.Slider(minimum=1, maximum=
|
615 |
-
gr.Slider(minimum=1, maximum=20, step=1, value=
|
616 |
gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
|
617 |
],
|
618 |
outputs=[
|
@@ -625,12 +653,11 @@ iface = gr.Interface(
|
|
625 |
gr.Plot(label="Happy Anomalies"),
|
626 |
gr.Plot(label="Surprise Anomalies"),
|
627 |
gr.Plot(label="Neutral Anomalies"),
|
628 |
-
|
629 |
-
],
|
630 |
title="Facial Expressions Anomaly Detection",
|
631 |
description="""
|
632 |
This application detects anomalies in facial expressions and emotions from a video input.
|
633 |
-
It identifies distinct persons in the video and provides sample faces for each, with
|
634 |
|
635 |
Adjust the parameters as needed:
|
636 |
- Number of Components: Complexity of the facial expression model
|
@@ -642,5 +669,5 @@ iface = gr.Interface(
|
|
642 |
allow_flagging="never"
|
643 |
)
|
644 |
|
645 |
-
|
646 |
-
|
|
|
35 |
# Initialize models and other global variables
|
36 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
37 |
|
38 |
+
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.975, 0.975, 0.975], min_face_size=100)
|
|
|
39 |
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
|
40 |
mp_face_mesh = mp.solutions.face_mesh
|
41 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
|
|
|
155 |
|
156 |
return embeddings_by_frame, emotions_by_frame, aligned_face_paths
|
157 |
|
|
|
158 |
def cluster_faces(embeddings):
|
159 |
if len(embeddings) < 2:
|
160 |
print("Not enough faces for clustering. Assigning all to one cluster.")
|
|
|
226 |
|
227 |
return df, largest_cluster
|
228 |
|
229 |
+
def determine_optimal_anomalies(anomaly_scores, z_threshold=3.5):
|
230 |
mean = np.mean(anomaly_scores)
|
231 |
std = np.std(anomaly_scores)
|
232 |
threshold = mean + z_threshold * std
|
|
|
237 |
h, m, s = map(float, timecode.split(':'))
|
238 |
return h * 3600 + m * 60 + s
|
239 |
|
240 |
+
def group_similar_timecodes(timecodes, scores, threshold_seconds=10):
|
241 |
grouped = []
|
242 |
current_group = []
|
243 |
|
|
|
280 |
|
281 |
print(f"X shape after reshaping: {X.shape}")
|
282 |
|
283 |
+
train_size = int(0.9 * X.shape[1])
|
284 |
X_train, X_val = X[:, :train_size, :], X[:, train_size:, :]
|
285 |
|
286 |
model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
|
|
|
364 |
buf.seek(0)
|
365 |
return buf
|
366 |
|
367 |
+
def embedding_anomaly_detection(embeddings, epochs=100, batch_size=64):
|
368 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
369 |
+
X = torch.FloatTensor(embeddings).to(device)
|
370 |
+
if X.dim() == 2:
|
371 |
+
X = X.unsqueeze(0)
|
372 |
+
elif X.dim() == 1:
|
373 |
+
X = X.unsqueeze(0).unsqueeze(2)
|
374 |
+
|
375 |
+
model = LSTMAutoencoder(input_size=X.shape[2]).to(device)
|
376 |
+
criterion = nn.MSELoss()
|
377 |
+
optimizer = optim.Adam(model.parameters())
|
378 |
+
|
379 |
+
for epoch in range(epochs):
|
380 |
+
model.train()
|
381 |
+
optimizer.zero_grad()
|
382 |
+
output = model(X)
|
383 |
+
loss = criterion(output, X)
|
384 |
+
loss.backward()
|
385 |
+
optimizer.step()
|
386 |
+
|
387 |
+
model.eval()
|
388 |
+
with torch.no_grad():
|
389 |
+
reconstructed = model(X).squeeze(0).cpu().numpy()
|
390 |
+
|
391 |
+
mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
|
392 |
+
anomalies, top_indices = determine_optimal_anomalies(mse)
|
393 |
+
|
394 |
+
return anomalies, mse, top_indices
|
395 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
396 |
plt.figure(figsize=(16, 8), dpi=300)
|
397 |
fig, ax = plt.subplots(figsize=(16, 8))
|
|
|
399 |
df['Seconds'] = df['Timecode'].apply(
|
400 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
401 |
|
402 |
+
# Filter out data points without faces
|
403 |
+
valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
|
404 |
+
seconds = df['Seconds'].iloc[valid_indices].values
|
405 |
+
scores = anomaly_scores[valid_indices]
|
406 |
|
407 |
ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
|
408 |
|
409 |
+
top_indices = [idx for idx in top_indices if idx in valid_indices]
|
410 |
+
ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
|
411 |
|
412 |
# Calculate and plot baseline
|
413 |
+
non_anomalous_scores = np.delete(scores, top_indices)
|
414 |
baseline = np.mean(non_anomalous_scores)
|
415 |
ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
|
416 |
ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
|
417 |
verticalalignment='bottom', horizontalalignment='right', color='black')
|
418 |
|
419 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
420 |
+
scores[top_indices])
|
421 |
|
422 |
for group in grouped_timecodes:
|
423 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
|
|
450 |
df['Seconds'] = df['Timecode'].apply(
|
451 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
452 |
|
453 |
+
# Filter out data points without faces
|
454 |
+
valid_indices = [i for i in range(len(anomaly_scores)) if i in df.index]
|
455 |
+
seconds = df['Seconds'].iloc[valid_indices].values
|
456 |
+
scores = anomaly_scores[valid_indices]
|
457 |
|
458 |
ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
|
459 |
|
460 |
+
top_indices = [idx for idx in top_indices if idx in valid_indices]
|
461 |
ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
|
462 |
|
463 |
# Calculate and plot baseline
|
|
|
495 |
plt.close()
|
496 |
return fig
|
497 |
|
498 |
+
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster):
|
499 |
+
face_samples = {"most_frequent": [], "others": []}
|
500 |
+
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|
501 |
if cluster_folder.startswith("person_"):
|
502 |
person_folder = os.path.join(organized_faces_folder, cluster_folder)
|
503 |
+
face_files = sorted([f for f in os.listdir(person_folder) if f.endswith('.jpg')])
|
504 |
if face_files:
|
505 |
+
cluster_id = int(cluster_folder.split('_')[1])
|
506 |
+
if cluster_id == largest_cluster:
|
507 |
+
for i, sample in enumerate(face_files):
|
508 |
+
face_path = os.path.join(person_folder, sample)
|
509 |
+
output_path = os.path.join(output_folder, f"face_sample_most_frequent_{i:04d}.jpg")
|
510 |
+
face_img = cv2.imread(face_path)
|
511 |
+
if face_img is not None:
|
512 |
+
small_face = cv2.resize(face_img, (160, 160))
|
513 |
+
cv2.imwrite(output_path, small_face)
|
514 |
+
face_samples["most_frequent"].append(output_path)
|
515 |
else:
|
516 |
+
for i, sample in enumerate(face_files):
|
517 |
+
face_path = os.path.join(person_folder, sample)
|
518 |
+
output_path = os.path.join(output_folder, f"face_sample_other_{cluster_id:02d}_{i:04d}.jpg")
|
519 |
+
face_img = cv2.imread(face_path)
|
520 |
+
if face_img is not None:
|
521 |
+
small_face = cv2.resize(face_img, (160, 160))
|
522 |
+
cv2.imwrite(output_path, small_face)
|
523 |
+
face_samples["others"].append(output_path)
|
|
|
|
|
524 |
return face_samples
|
|
|
525 |
def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
|
526 |
output_folder = "output"
|
527 |
os.makedirs(output_folder, exist_ok=True)
|
|
|
552 |
|
553 |
if not aligned_face_paths:
|
554 |
return ("No faces were extracted from the video.",
|
555 |
+
None, None, None, None, None, None, None, None)
|
556 |
|
557 |
progress(0.6, "Clustering faces")
|
558 |
embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
|
|
|
567 |
original_fps, temp_dir, num_components, video_duration)
|
568 |
|
569 |
progress(0.85, "Getting face samples")
|
570 |
+
face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
571 |
|
572 |
progress(0.9, "Performing anomaly detection")
|
573 |
feature_columns = [col for col in df.columns if
|
|
|
592 |
|
593 |
except Exception as e:
|
594 |
print(f"Error details: {str(e)}")
|
595 |
+
return f"Error in anomaly detection: {str(e)}", None, None, None, None, None, None, None, None
|
596 |
|
597 |
progress(0.95, "Generating plots")
|
598 |
try:
|
|
|
611 |
['purple', 'green', 'orange', 'darkblue', 'gold', 'grey'])
|
612 |
]
|
613 |
except Exception as e:
|
614 |
+
return f"Error generating plots: {str(e)}", None, None, None, None, None, None, None, None
|
615 |
|
616 |
progress(1.0, "Preparing results")
|
617 |
results = f"Number of persons/clusters detected: {num_clusters}\n\n"
|
618 |
results += f"Breakdown of persons/clusters:\n"
|
619 |
for cluster_id in range(num_clusters):
|
620 |
results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
|
621 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
|
623 |
return (
|
624 |
results,
|
625 |
anomaly_plot_all,
|
626 |
anomaly_plot_comp,
|
627 |
*emotion_plots,
|
628 |
+
face_samples["most_frequent"],
|
629 |
+
face_samples["others"]
|
630 |
)
|
631 |
|
632 |
+
|
633 |
+
gallery_outputs = [
|
634 |
+
gr.Gallery(label="Most Frequent Person Random Samples", columns=5, rows=2, height="auto"),
|
635 |
+
gr.Gallery(label="Other Persons Random Samples", columns=5, rows=1, height="auto")
|
636 |
+
]
|
637 |
+
|
638 |
iface = gr.Interface(
|
639 |
fn=process_video,
|
640 |
inputs=[
|
641 |
gr.Video(),
|
642 |
+
gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Components"),
|
643 |
+
gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Desired FPS"),
|
644 |
gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
|
645 |
],
|
646 |
outputs=[
|
|
|
653 |
gr.Plot(label="Happy Anomalies"),
|
654 |
gr.Plot(label="Surprise Anomalies"),
|
655 |
gr.Plot(label="Neutral Anomalies"),
|
656 |
+
] + gallery_outputs,
|
|
|
657 |
title="Facial Expressions Anomaly Detection",
|
658 |
description="""
|
659 |
This application detects anomalies in facial expressions and emotions from a video input.
|
660 |
+
It identifies distinct persons in the video and provides sample faces for each, with multiple samples for the most frequent person.
|
661 |
|
662 |
Adjust the parameters as needed:
|
663 |
- Number of Components: Complexity of the facial expression model
|
|
|
669 |
allow_flagging="never"
|
670 |
)
|
671 |
|
672 |
+
|
673 |
+
iface.launch()
|