Update app.py
Browse files
app.py
CHANGED
@@ -173,7 +173,7 @@ def cluster_faces(face_images):
|
|
173 |
X = X / 255.0
|
174 |
|
175 |
# Perform DBSCAN clustering
|
176 |
-
dbscan = DBSCAN(eps=0.3, min_samples=
|
177 |
clusters = dbscan.fit_predict(X)
|
178 |
|
179 |
# If DBSCAN assigns all to noise (-1), consider it as one cluster
|
@@ -238,6 +238,13 @@ def save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, de
|
|
238 |
|
239 |
return df, largest_cluster
|
240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
class LSTMAutoencoder(nn.Module):
|
242 |
def __init__(self, input_size, hidden_size=64, num_layers=2):
|
243 |
super(LSTMAutoencoder, self).__init__()
|
@@ -252,7 +259,7 @@ class LSTMAutoencoder(nn.Module):
|
|
252 |
out = self.fc(outputs)
|
253 |
return out
|
254 |
|
255 |
-
def lstm_anomaly_detection(X, feature_columns,
|
256 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
257 |
X = torch.FloatTensor(X).to(device)
|
258 |
if X.dim() == 2:
|
@@ -289,9 +296,7 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
|
|
289 |
reconstructed = model(X).squeeze(0).cpu().numpy()
|
290 |
|
291 |
mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
|
292 |
-
top_indices_all = mse_all
|
293 |
-
anomalies_all = np.zeros(len(mse_all), dtype=bool)
|
294 |
-
anomalies_all[top_indices_all] = True
|
295 |
|
296 |
component_columns = [col for col in feature_columns if col.startswith('Comp')]
|
297 |
component_indices = [feature_columns.index(col) for col in component_columns]
|
@@ -302,15 +307,13 @@ def lstm_anomaly_detection(X, feature_columns, num_anomalies=10, epochs=100, bat
|
|
302 |
else:
|
303 |
mse_comp = mse_all
|
304 |
|
305 |
-
top_indices_comp = mse_comp
|
306 |
-
anomalies_comp = np.zeros(len(mse_comp), dtype=bool)
|
307 |
-
anomalies_comp[top_indices_comp] = True
|
308 |
|
309 |
return (anomalies_all, mse_all, top_indices_all,
|
310 |
anomalies_comp, mse_comp, top_indices_comp,
|
311 |
model)
|
312 |
|
313 |
-
def emotion_anomaly_detection(emotion_data,
|
314 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
315 |
X = torch.FloatTensor(emotion_data.values).to(device)
|
316 |
if X.dim() == 1:
|
@@ -335,9 +338,7 @@ def emotion_anomaly_detection(emotion_data, num_anomalies=10, epochs=100, batch_
|
|
335 |
reconstructed = model(X).squeeze(0).cpu().numpy()
|
336 |
|
337 |
mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
|
338 |
-
top_indices = mse
|
339 |
-
anomalies = np.zeros(len(mse), dtype=bool)
|
340 |
-
anomalies[top_indices] = True
|
341 |
|
342 |
return anomalies, mse, top_indices
|
343 |
|
@@ -350,7 +351,7 @@ def normalize_scores(scores):
|
|
350 |
|
351 |
|
352 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
353 |
-
plt.figure(figsize=(16, 8), dpi=
|
354 |
fig, ax = plt.subplots(figsize=(16, 8))
|
355 |
|
356 |
df['Seconds'] = df['Timecode'].apply(
|
@@ -379,7 +380,7 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
|
379 |
|
380 |
max_seconds = df['Seconds'].max()
|
381 |
ax.set_xlim(0, max_seconds)
|
382 |
-
num_ticks =
|
383 |
ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
|
384 |
ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
|
385 |
rotation=90, ha='center', va='top')
|
@@ -392,8 +393,8 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
|
392 |
plt.tight_layout()
|
393 |
return fig
|
394 |
|
395 |
-
def plot_emotion(df, emotion, anomaly_scores, top_indices,
|
396 |
-
plt.figure(figsize=(16, 8), dpi=
|
397 |
fig, ax = plt.subplots(figsize=(16, 8))
|
398 |
|
399 |
df['Seconds'] = df['Timecode'].apply(
|
@@ -419,38 +420,45 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, num_anomalies, color,
|
|
419 |
|
420 |
max_seconds = df['Seconds'].max()
|
421 |
ax.set_xlim(0, max_seconds)
|
422 |
-
num_ticks =
|
423 |
ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
|
424 |
ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
|
425 |
rotation=90, ha='center', va='top')
|
426 |
|
427 |
ax.set_xlabel('Time')
|
428 |
ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
|
429 |
-
ax.set_title(f'{emotion.capitalize()} Anomaly Scores
|
430 |
|
431 |
ax.grid(True, linestyle='--', alpha=0.7)
|
432 |
plt.tight_layout()
|
433 |
return fig
|
434 |
|
435 |
-
def get_random_face_samples(organized_faces_folder, output_folder):
|
436 |
face_samples = []
|
437 |
for cluster_folder in os.listdir(organized_faces_folder):
|
438 |
if cluster_folder.startswith("person_"):
|
439 |
person_folder = os.path.join(organized_faces_folder, cluster_folder)
|
440 |
face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
|
441 |
if face_files:
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
return face_samples
|
451 |
|
452 |
|
453 |
-
def process_video(video_path,
|
454 |
output_folder = "output"
|
455 |
os.makedirs(output_folder, exist_ok=True)
|
456 |
|
@@ -490,13 +498,13 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
490 |
progress(0.7, "Organizing faces")
|
491 |
organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
|
492 |
|
493 |
-
progress(0.75, "Getting face samples")
|
494 |
-
face_samples = get_random_face_samples(organized_faces_folder, output_folder)
|
495 |
-
|
496 |
progress(0.8, "Saving person data")
|
497 |
df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
|
498 |
original_fps, temp_dir, num_components, video_duration)
|
499 |
|
|
|
|
|
|
|
500 |
progress(0.9, "Performing anomaly detection")
|
501 |
feature_columns = [col for col in df.columns if
|
502 |
col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
|
@@ -504,7 +512,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
504 |
|
505 |
try:
|
506 |
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
|
507 |
-
X, feature_columns,
|
508 |
|
509 |
# Normalize anomaly scores
|
510 |
anomaly_scores_all = normalize_scores(anomaly_scores_all)
|
@@ -513,7 +521,7 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
513 |
# Perform anomaly detection for each emotion using LSTM autoencoder
|
514 |
emotion_anomalies = {}
|
515 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
516 |
-
anomalies, scores, indices = emotion_anomaly_detection(df[emotion]
|
517 |
emotion_anomalies[emotion] = {
|
518 |
'anomalies': anomalies,
|
519 |
'scores': normalize_scores(scores),
|
@@ -534,7 +542,6 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
534 |
plot_emotion(df, emotion,
|
535 |
emotion_anomalies[emotion]['scores'],
|
536 |
emotion_anomalies[emotion]['indices'],
|
537 |
-
num_anomalies,
|
538 |
color,
|
539 |
df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
|
540 |
for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
|
@@ -548,17 +555,17 @@ def process_video(video_path, num_anomalies, num_components, desired_fps, batch_
|
|
548 |
results += f"Breakdown of persons/clusters:\n"
|
549 |
for cluster_id in range(num_clusters):
|
550 |
results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
|
551 |
-
results += f"\
|
552 |
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
553 |
zip(anomaly_scores_all[top_indices_all[1:]],
|
554 |
df['Timecode'].iloc[top_indices_all[1:]].values)])
|
555 |
-
results += f"\n\
|
556 |
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
557 |
zip(anomaly_scores_comp[top_indices_comp[1:]],
|
558 |
df['Timecode'].iloc[top_indices_comp[1:]].values)])
|
559 |
|
560 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
561 |
-
results += f"\n\
|
562 |
results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
|
563 |
for i in emotion_anomalies[emotion]['indices'] if i > 0])
|
564 |
|
@@ -575,7 +582,6 @@ iface = gr.Interface(
|
|
575 |
fn=process_video,
|
576 |
inputs=[
|
577 |
gr.Video(),
|
578 |
-
gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Anomalies"),
|
579 |
gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
|
580 |
gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
|
581 |
gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
|
@@ -590,15 +596,14 @@ iface = gr.Interface(
|
|
590 |
gr.Plot(label="Happy Anomalies"),
|
591 |
gr.Plot(label="Surprise Anomalies"),
|
592 |
gr.Plot(label="Neutral Anomalies"),
|
593 |
-
gr.Gallery(label="Detected Persons", columns=[
|
594 |
],
|
595 |
title="Facial Expressions Anomaly Detection",
|
596 |
description="""
|
597 |
This application detects anomalies in facial expressions and emotions from a video input.
|
598 |
-
It identifies distinct persons in the video and provides
|
599 |
|
600 |
Adjust the parameters as needed:
|
601 |
-
- Number of Anomalies: How many top anomalies or high intensities to highlight
|
602 |
- Number of Components: Complexity of the facial expression model
|
603 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
604 |
- Batch Size: Affects processing speed and memory usage
|
|
|
173 |
X = X / 255.0
|
174 |
|
175 |
# Perform DBSCAN clustering
|
176 |
+
dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
|
177 |
clusters = dbscan.fit_predict(X)
|
178 |
|
179 |
# If DBSCAN assigns all to noise (-1), consider it as one cluster
|
|
|
238 |
|
239 |
return df, largest_cluster
|
240 |
|
241 |
+
def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
|
242 |
+
mean = np.mean(anomaly_scores)
|
243 |
+
std = np.std(anomaly_scores)
|
244 |
+
threshold = mean + z_threshold * std
|
245 |
+
anomalies = anomaly_scores > threshold
|
246 |
+
return anomalies, np.where(anomalies)[0]
|
247 |
+
|
248 |
class LSTMAutoencoder(nn.Module):
|
249 |
def __init__(self, input_size, hidden_size=64, num_layers=2):
|
250 |
super(LSTMAutoencoder, self).__init__()
|
|
|
259 |
out = self.fc(outputs)
|
260 |
return out
|
261 |
|
262 |
+
def lstm_anomaly_detection(X, feature_columns, epochs=100, batch_size=64):
|
263 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
264 |
X = torch.FloatTensor(X).to(device)
|
265 |
if X.dim() == 2:
|
|
|
296 |
reconstructed = model(X).squeeze(0).cpu().numpy()
|
297 |
|
298 |
mse_all = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
|
299 |
+
anomalies_all, top_indices_all = determine_optimal_anomalies(mse_all)
|
|
|
|
|
300 |
|
301 |
component_columns = [col for col in feature_columns if col.startswith('Comp')]
|
302 |
component_indices = [feature_columns.index(col) for col in component_columns]
|
|
|
307 |
else:
|
308 |
mse_comp = mse_all
|
309 |
|
310 |
+
anomalies_comp, top_indices_comp = determine_optimal_anomalies(mse_comp)
|
|
|
|
|
311 |
|
312 |
return (anomalies_all, mse_all, top_indices_all,
|
313 |
anomalies_comp, mse_comp, top_indices_comp,
|
314 |
model)
|
315 |
|
316 |
+
def emotion_anomaly_detection(emotion_data, epochs=100, batch_size=64):
|
317 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
318 |
X = torch.FloatTensor(emotion_data.values).to(device)
|
319 |
if X.dim() == 1:
|
|
|
338 |
reconstructed = model(X).squeeze(0).cpu().numpy()
|
339 |
|
340 |
mse = np.mean(np.power(X.squeeze(0).cpu().numpy() - reconstructed, 2), axis=1)
|
341 |
+
anomalies, top_indices = determine_optimal_anomalies(mse)
|
|
|
|
|
342 |
|
343 |
return anomalies, mse, top_indices
|
344 |
|
|
|
351 |
|
352 |
|
353 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
354 |
+
plt.figure(figsize=(16, 8), dpi=500)
|
355 |
fig, ax = plt.subplots(figsize=(16, 8))
|
356 |
|
357 |
df['Seconds'] = df['Timecode'].apply(
|
|
|
380 |
|
381 |
max_seconds = df['Seconds'].max()
|
382 |
ax.set_xlim(0, max_seconds)
|
383 |
+
num_ticks = 100
|
384 |
ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
|
385 |
ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
|
386 |
rotation=90, ha='center', va='top')
|
|
|
393 |
plt.tight_layout()
|
394 |
return fig
|
395 |
|
396 |
+
def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
397 |
+
plt.figure(figsize=(16, 8), dpi=500)
|
398 |
fig, ax = plt.subplots(figsize=(16, 8))
|
399 |
|
400 |
df['Seconds'] = df['Timecode'].apply(
|
|
|
420 |
|
421 |
max_seconds = df['Seconds'].max()
|
422 |
ax.set_xlim(0, max_seconds)
|
423 |
+
num_ticks = 100
|
424 |
ax.set_xticks(np.linspace(0, max_seconds, num_ticks))
|
425 |
ax.set_xticklabels([f"{int(x // 60):02d}:{int(x % 60):02d}" for x in ax.get_xticks()],
|
426 |
rotation=90, ha='center', va='top')
|
427 |
|
428 |
ax.set_xlabel('Time')
|
429 |
ax.set_ylabel(f'{emotion.capitalize()} Anomaly Score')
|
430 |
+
ax.set_title(f'{emotion.capitalize()} Anomaly Scores')
|
431 |
|
432 |
ax.grid(True, linestyle='--', alpha=0.7)
|
433 |
plt.tight_layout()
|
434 |
return fig
|
435 |
|
436 |
+
def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
|
437 |
face_samples = []
|
438 |
for cluster_folder in os.listdir(organized_faces_folder):
|
439 |
if cluster_folder.startswith("person_"):
|
440 |
person_folder = os.path.join(organized_faces_folder, cluster_folder)
|
441 |
face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
|
442 |
if face_files:
|
443 |
+
if int(cluster_folder.split('_')[1]) == largest_cluster:
|
444 |
+
# Get 10 samples for the largest cluster
|
445 |
+
samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
|
446 |
+
else:
|
447 |
+
# Get 1 sample for other clusters
|
448 |
+
samples = [np.random.choice(face_files)]
|
449 |
+
|
450 |
+
for i, sample in enumerate(samples):
|
451 |
+
face_path = os.path.join(person_folder, sample)
|
452 |
+
output_path = os.path.join(output_folder, f"face_sample_{cluster_folder}_{i}.jpg")
|
453 |
+
face_img = cv2.imread(face_path)
|
454 |
+
if face_img is not None:
|
455 |
+
small_face = cv2.resize(face_img, (160, 160))
|
456 |
+
cv2.imwrite(output_path, small_face)
|
457 |
+
face_samples.append(output_path)
|
458 |
return face_samples
|
459 |
|
460 |
|
461 |
+
def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
|
462 |
output_folder = "output"
|
463 |
os.makedirs(output_folder, exist_ok=True)
|
464 |
|
|
|
498 |
progress(0.7, "Organizing faces")
|
499 |
organize_faces_by_person(embeddings_by_frame, clusters, aligned_faces_folder, organized_faces_folder)
|
500 |
|
|
|
|
|
|
|
501 |
progress(0.8, "Saving person data")
|
502 |
df, largest_cluster = save_person_data_to_csv(embeddings_by_frame, emotions_by_frame, clusters, desired_fps,
|
503 |
original_fps, temp_dir, num_components, video_duration)
|
504 |
|
505 |
+
progress(0.85, "Getting face samples")
|
506 |
+
face_samples = get_random_face_samples(organized_faces_folder, output_folder, largest_cluster)
|
507 |
+
|
508 |
progress(0.9, "Performing anomaly detection")
|
509 |
feature_columns = [col for col in df.columns if
|
510 |
col not in ['Frame', 'Timecode', 'Time (Minutes)', 'Embedding_Index']]
|
|
|
512 |
|
513 |
try:
|
514 |
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
|
515 |
+
X, feature_columns, batch_size=batch_size)
|
516 |
|
517 |
# Normalize anomaly scores
|
518 |
anomaly_scores_all = normalize_scores(anomaly_scores_all)
|
|
|
521 |
# Perform anomaly detection for each emotion using LSTM autoencoder
|
522 |
emotion_anomalies = {}
|
523 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
524 |
+
anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
|
525 |
emotion_anomalies[emotion] = {
|
526 |
'anomalies': anomalies,
|
527 |
'scores': normalize_scores(scores),
|
|
|
542 |
plot_emotion(df, emotion,
|
543 |
emotion_anomalies[emotion]['scores'],
|
544 |
emotion_anomalies[emotion]['indices'],
|
|
|
545 |
color,
|
546 |
df['Timecode'].iloc[emotion_anomalies[emotion]['indices']].values)
|
547 |
for emotion, color in zip(['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral'],
|
|
|
555 |
results += f"Breakdown of persons/clusters:\n"
|
556 |
for cluster_id in range(num_clusters):
|
557 |
results += f"Person/Cluster {cluster_id + 1}: {len([c for c in clusters if c == cluster_id])} frames\n"
|
558 |
+
results += f"\nAnomalies (Facial Features + Emotions):\n"
|
559 |
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
560 |
zip(anomaly_scores_all[top_indices_all[1:]],
|
561 |
df['Timecode'].iloc[top_indices_all[1:]].values)])
|
562 |
+
results += f"\n\nAnomalies (Facial Features):\n"
|
563 |
results += "\n".join([f"{score:.2f} at {timecode}" for score, timecode in
|
564 |
zip(anomaly_scores_comp[top_indices_comp[1:]],
|
565 |
df['Timecode'].iloc[top_indices_comp[1:]].values)])
|
566 |
|
567 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
568 |
+
results += f"\n\n{emotion.capitalize()} Anomalies:\n"
|
569 |
results += "\n".join([f"{emotion_anomalies[emotion]['scores'][i]:.2f} at {df['Timecode'].iloc[i]}"
|
570 |
for i in emotion_anomalies[emotion]['indices'] if i > 0])
|
571 |
|
|
|
582 |
fn=process_video,
|
583 |
inputs=[
|
584 |
gr.Video(),
|
|
|
585 |
gr.Slider(minimum=1, maximum=20, step=1, value=10, label="Number of Components"),
|
586 |
gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Desired FPS"),
|
587 |
gr.Slider(minimum=1, maximum=32, step=1, value=8, label="Batch Size")
|
|
|
596 |
gr.Plot(label="Happy Anomalies"),
|
597 |
gr.Plot(label="Surprise Anomalies"),
|
598 |
gr.Plot(label="Neutral Anomalies"),
|
599 |
+
gr.Gallery(label="Detected Persons", columns=[5], rows=[2], height="auto")
|
600 |
],
|
601 |
title="Facial Expressions Anomaly Detection",
|
602 |
description="""
|
603 |
This application detects anomalies in facial expressions and emotions from a video input.
|
604 |
+
It identifies distinct persons in the video and provides sample faces for each, with 10 samples for the most frequent person.
|
605 |
|
606 |
Adjust the parameters as needed:
|
|
|
607 |
- Number of Components: Complexity of the facial expression model
|
608 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
609 |
- Batch Size: Affects processing speed and memory usage
|