Update app.py
Browse files
app.py
CHANGED
@@ -21,6 +21,7 @@ from PIL import Image
|
|
21 |
import gradio as gr
|
22 |
import tempfile
|
23 |
import shutil
|
|
|
24 |
|
25 |
# Suppress TensorFlow warnings
|
26 |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
@@ -34,7 +35,7 @@ matplotlib.rcParams['savefig.dpi'] = 400
|
|
34 |
# Initialize models and other global variables
|
35 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
36 |
|
37 |
-
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.
|
38 |
selection_method='largest')
|
39 |
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
|
40 |
mp_face_mesh = mp.solutions.face_mesh
|
@@ -155,31 +156,20 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress, b
|
|
155 |
|
156 |
return embeddings_by_frame, emotions_by_frame, aligned_face_paths
|
157 |
|
158 |
-
def cluster_faces(face_images):
|
159 |
-
if len(face_images) < 2:
|
160 |
-
print("Not enough faces for clustering. Assigning all to one cluster.")
|
161 |
-
return np.zeros(len(face_images), dtype=int)
|
162 |
-
|
163 |
-
# Resize all images to a consistent size
|
164 |
-
resized_faces = [cv2.resize(face, (224, 224)) for face in face_images]
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
X = np.stack(gray_faces)
|
171 |
|
172 |
-
|
173 |
-
X = X / 255.0
|
174 |
|
175 |
-
|
176 |
-
dbscan = DBSCAN(eps=0.3, min_samples=10, metric='euclidean')
|
177 |
clusters = dbscan.fit_predict(X)
|
178 |
|
179 |
-
# If DBSCAN assigns all to noise (-1), consider it as one cluster
|
180 |
if np.all(clusters == -1):
|
181 |
print("DBSCAN assigned all to noise. Considering as one cluster.")
|
182 |
-
return np.zeros(len(
|
183 |
|
184 |
return clusters
|
185 |
|
@@ -245,12 +235,10 @@ def determine_optimal_anomalies(anomaly_scores, z_threshold=3):
|
|
245 |
anomalies = anomaly_scores > threshold
|
246 |
return anomalies, np.where(anomalies)[0]
|
247 |
|
248 |
-
|
249 |
def timecode_to_seconds(timecode):
|
250 |
h, m, s = map(float, timecode.split(':'))
|
251 |
return h * 3600 + m * 60 + s
|
252 |
|
253 |
-
|
254 |
def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
|
255 |
grouped = []
|
256 |
current_group = []
|
@@ -372,32 +360,39 @@ def normalize_scores(scores):
|
|
372 |
return np.full_like(scores, 100)
|
373 |
return ((scores - min_score) / (max_score - min_score)) * 100
|
374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
376 |
-
plt.figure(figsize=(16, 8), dpi=
|
377 |
fig, ax = plt.subplots(figsize=(16, 8))
|
378 |
|
379 |
df['Seconds'] = df['Timecode'].apply(
|
380 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
381 |
|
382 |
-
# Normalize scores
|
383 |
normalized_scores = normalize_scores(anomaly_scores)
|
384 |
|
385 |
-
# Omit the first data point
|
386 |
seconds = df['Seconds'].values[1:]
|
387 |
scores = normalized_scores[1:]
|
388 |
|
389 |
-
# Create scatter plot
|
390 |
ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
|
391 |
|
392 |
-
# Highlight top anomalies (excluding the first data point)
|
393 |
top_indices = [idx for idx in top_indices if idx > 0]
|
394 |
ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
|
395 |
|
396 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
398 |
normalized_scores[top_indices])
|
399 |
|
400 |
-
# Add timecode annotations for grouped timecodes
|
401 |
for group in grouped_timecodes:
|
402 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
403 |
timecode, score, idx = group[max_score_idx]
|
@@ -415,35 +410,39 @@ def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
|
415 |
|
416 |
ax.set_xlabel('Time')
|
417 |
ax.set_ylabel('Anomaly Score')
|
418 |
-
ax.set_title(
|
419 |
|
420 |
ax.grid(True, linestyle='--', alpha=0.7)
|
421 |
plt.tight_layout()
|
|
|
422 |
return fig
|
423 |
|
424 |
def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
425 |
-
plt.figure(figsize=(16, 8), dpi=
|
426 |
fig, ax = plt.subplots(figsize=(16, 8))
|
427 |
|
428 |
df['Seconds'] = df['Timecode'].apply(
|
429 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
430 |
|
431 |
-
# Omit the first data point
|
432 |
seconds = df['Seconds'].values[1:]
|
433 |
scores = anomaly_scores[1:]
|
434 |
|
435 |
-
# Create scatter plot
|
436 |
ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
|
437 |
|
438 |
-
# Highlight top anomalies (excluding the first data point)
|
439 |
top_indices = [idx for idx in top_indices if idx > 0]
|
440 |
ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
|
441 |
|
442 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
444 |
anomaly_scores[top_indices])
|
445 |
|
446 |
-
# Add timecode annotations for grouped timecodes
|
447 |
for group in grouped_timecodes:
|
448 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
449 |
timecode, score, idx = group[max_score_idx]
|
@@ -465,6 +464,7 @@ def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
|
465 |
|
466 |
ax.grid(True, linestyle='--', alpha=0.7)
|
467 |
plt.tight_layout()
|
|
|
468 |
return fig
|
469 |
|
470 |
def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
|
@@ -475,10 +475,8 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
|
|
475 |
face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
|
476 |
if face_files:
|
477 |
if int(cluster_folder.split('_')[1]) == largest_cluster:
|
478 |
-
# Get 10 samples for the largest cluster
|
479 |
samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
|
480 |
else:
|
481 |
-
# Get 1 sample for other clusters
|
482 |
samples = [np.random.choice(face_files)]
|
483 |
|
484 |
for i, sample in enumerate(samples):
|
@@ -491,7 +489,6 @@ def get_random_face_samples(organized_faces_folder, output_folder, largest_clust
|
|
491 |
face_samples.append(output_path)
|
492 |
return face_samples
|
493 |
|
494 |
-
|
495 |
def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
|
496 |
output_folder = "output"
|
497 |
os.makedirs(output_folder, exist_ok=True)
|
@@ -525,8 +522,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
525 |
None, None, None, None, None, None, None, None, None)
|
526 |
|
527 |
progress(0.6, "Clustering faces")
|
528 |
-
|
529 |
-
clusters = cluster_faces(
|
530 |
num_clusters = len(set(clusters)) # Get the number of unique clusters
|
531 |
|
532 |
progress(0.7, "Organizing faces")
|
@@ -548,11 +545,9 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
548 |
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
|
549 |
X, feature_columns, batch_size=batch_size)
|
550 |
|
551 |
-
# Normalize anomaly scores
|
552 |
anomaly_scores_all = normalize_scores(anomaly_scores_all)
|
553 |
anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
|
554 |
|
555 |
-
# Perform anomaly detection for each emotion using LSTM autoencoder
|
556 |
emotion_anomalies = {}
|
557 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
558 |
anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
|
@@ -568,7 +563,8 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
568 |
|
569 |
progress(0.95, "Generating plots")
|
570 |
try:
|
571 |
-
anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all,
|
|
|
572 |
df['Timecode'].iloc[top_indices_all].values)
|
573 |
anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
|
574 |
df['Timecode'].iloc[top_indices_comp].values)
|
@@ -611,7 +607,6 @@ def process_video(video_path, num_components, desired_fps, batch_size, progress=
|
|
611 |
face_samples
|
612 |
)
|
613 |
|
614 |
-
|
615 |
iface = gr.Interface(
|
616 |
fn=process_video,
|
617 |
inputs=[
|
@@ -641,6 +636,8 @@ iface = gr.Interface(
|
|
641 |
- Number of Components: Complexity of the facial expression model
|
642 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
643 |
- Batch Size: Affects processing speed and memory usage
|
|
|
|
|
644 |
""",
|
645 |
allow_flagging="never"
|
646 |
)
|
|
|
21 |
import gradio as gr
|
22 |
import tempfile
|
23 |
import shutil
|
24 |
+
import io
|
25 |
|
26 |
# Suppress TensorFlow warnings
|
27 |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
|
|
35 |
# Initialize models and other global variables
|
36 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
37 |
|
38 |
+
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.98, 0.98, 0.98], min_face_size=50,
|
39 |
selection_method='largest')
|
40 |
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
|
41 |
mp_face_mesh = mp.solutions.face_mesh
|
|
|
156 |
|
157 |
return embeddings_by_frame, emotions_by_frame, aligned_face_paths
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
+
def cluster_faces(embeddings):
|
161 |
+
if len(embeddings) < 2:
|
162 |
+
print("Not enough faces for clustering. Assigning all to one cluster.")
|
163 |
+
return np.zeros(len(embeddings), dtype=int)
|
|
|
164 |
|
165 |
+
X = np.stack(embeddings)
|
|
|
166 |
|
167 |
+
dbscan = DBSCAN(eps=0.5, min_samples=5, metric='cosine')
|
|
|
168 |
clusters = dbscan.fit_predict(X)
|
169 |
|
|
|
170 |
if np.all(clusters == -1):
|
171 |
print("DBSCAN assigned all to noise. Considering as one cluster.")
|
172 |
+
return np.zeros(len(embeddings), dtype=int)
|
173 |
|
174 |
return clusters
|
175 |
|
|
|
235 |
anomalies = anomaly_scores > threshold
|
236 |
return anomalies, np.where(anomalies)[0]
|
237 |
|
|
|
238 |
def timecode_to_seconds(timecode):
|
239 |
h, m, s = map(float, timecode.split(':'))
|
240 |
return h * 3600 + m * 60 + s
|
241 |
|
|
|
242 |
def group_similar_timecodes(timecodes, scores, threshold_seconds=5):
|
243 |
grouped = []
|
244 |
current_group = []
|
|
|
360 |
return np.full_like(scores, 100)
|
361 |
return ((scores - min_score) / (max_score - min_score)) * 100
|
362 |
|
363 |
+
def plot_to_image(fig):
|
364 |
+
buf = io.BytesIO()
|
365 |
+
fig.savefig(buf, format='png', dpi=300, bbox_inches='tight')
|
366 |
+
buf.seek(0)
|
367 |
+
return buf
|
368 |
+
|
369 |
def plot_anomaly_scores(df, anomaly_scores, top_indices, title, timecodes):
|
370 |
+
plt.figure(figsize=(16, 8), dpi=300)
|
371 |
fig, ax = plt.subplots(figsize=(16, 8))
|
372 |
|
373 |
df['Seconds'] = df['Timecode'].apply(
|
374 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
375 |
|
|
|
376 |
normalized_scores = normalize_scores(anomaly_scores)
|
377 |
|
|
|
378 |
seconds = df['Seconds'].values[1:]
|
379 |
scores = normalized_scores[1:]
|
380 |
|
|
|
381 |
ax.scatter(seconds, scores, color='blue', alpha=0.7, s=10)
|
382 |
|
|
|
383 |
top_indices = [idx for idx in top_indices if idx > 0]
|
384 |
ax.scatter(df['Seconds'].iloc[top_indices], normalized_scores[top_indices], color='red', s=50, zorder=5)
|
385 |
|
386 |
+
# Calculate and plot baseline
|
387 |
+
non_anomalous_scores = np.delete(normalized_scores, top_indices)
|
388 |
+
baseline = np.mean(non_anomalous_scores)
|
389 |
+
ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
|
390 |
+
ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
|
391 |
+
verticalalignment='bottom', horizontalalignment='right', color='black')
|
392 |
+
|
393 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
394 |
normalized_scores[top_indices])
|
395 |
|
|
|
396 |
for group in grouped_timecodes:
|
397 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
398 |
timecode, score, idx = group[max_score_idx]
|
|
|
410 |
|
411 |
ax.set_xlabel('Time')
|
412 |
ax.set_ylabel('Anomaly Score')
|
413 |
+
ax.set_title(title)
|
414 |
|
415 |
ax.grid(True, linestyle='--', alpha=0.7)
|
416 |
plt.tight_layout()
|
417 |
+
plt.close()
|
418 |
return fig
|
419 |
|
420 |
def plot_emotion(df, emotion, anomaly_scores, top_indices, color, timecodes):
|
421 |
+
plt.figure(figsize=(16, 8), dpi=300)
|
422 |
fig, ax = plt.subplots(figsize=(16, 8))
|
423 |
|
424 |
df['Seconds'] = df['Timecode'].apply(
|
425 |
lambda x: sum(float(t) * 60 ** i for i, t in enumerate(reversed(x.split(':')))))
|
426 |
|
|
|
427 |
seconds = df['Seconds'].values[1:]
|
428 |
scores = anomaly_scores[1:]
|
429 |
|
|
|
430 |
ax.scatter(seconds, scores, color=color, alpha=0.7, s=10)
|
431 |
|
|
|
432 |
top_indices = [idx for idx in top_indices if idx > 0]
|
433 |
ax.scatter(df['Seconds'].iloc[top_indices], anomaly_scores[top_indices], color='red', s=50, zorder=5)
|
434 |
|
435 |
+
# Calculate and plot baseline
|
436 |
+
non_anomalous_scores = np.delete(anomaly_scores, top_indices)
|
437 |
+
baseline = np.mean(non_anomalous_scores)
|
438 |
+
ax.axhline(y=baseline, color='black', linestyle='--', linewidth=2.5)
|
439 |
+
ax.text(df['Seconds'].max(), baseline, f'Baseline ({baseline:.2f})',
|
440 |
+
verticalalignment='bottom', horizontalalignment='right', color='black')
|
441 |
+
|
442 |
+
|
443 |
grouped_timecodes = group_similar_timecodes([df['Timecode'].iloc[idx] for idx in top_indices],
|
444 |
anomaly_scores[top_indices])
|
445 |
|
|
|
446 |
for group in grouped_timecodes:
|
447 |
max_score_idx = max(range(len(group)), key=lambda i: group[i][1])
|
448 |
timecode, score, idx = group[max_score_idx]
|
|
|
464 |
|
465 |
ax.grid(True, linestyle='--', alpha=0.7)
|
466 |
plt.tight_layout()
|
467 |
+
plt.close()
|
468 |
return fig
|
469 |
|
470 |
def get_random_face_samples(organized_faces_folder, output_folder, largest_cluster, num_samples=100):
|
|
|
475 |
face_files = [f for f in os.listdir(person_folder) if f.endswith('.jpg')]
|
476 |
if face_files:
|
477 |
if int(cluster_folder.split('_')[1]) == largest_cluster:
|
|
|
478 |
samples = np.random.choice(face_files, min(num_samples, len(face_files)), replace=False)
|
479 |
else:
|
|
|
480 |
samples = [np.random.choice(face_files)]
|
481 |
|
482 |
for i, sample in enumerate(samples):
|
|
|
489 |
face_samples.append(output_path)
|
490 |
return face_samples
|
491 |
|
|
|
492 |
def process_video(video_path, num_components, desired_fps, batch_size, progress=gr.Progress()):
|
493 |
output_folder = "output"
|
494 |
os.makedirs(output_folder, exist_ok=True)
|
|
|
522 |
None, None, None, None, None, None, None, None, None)
|
523 |
|
524 |
progress(0.6, "Clustering faces")
|
525 |
+
embeddings = [embedding for _, embedding in embeddings_by_frame.items()]
|
526 |
+
clusters = cluster_faces(embeddings)
|
527 |
num_clusters = len(set(clusters)) # Get the number of unique clusters
|
528 |
|
529 |
progress(0.7, "Organizing faces")
|
|
|
545 |
anomalies_all, anomaly_scores_all, top_indices_all, anomalies_comp, anomaly_scores_comp, top_indices_comp, _ = lstm_anomaly_detection(
|
546 |
X, feature_columns, batch_size=batch_size)
|
547 |
|
|
|
548 |
anomaly_scores_all = normalize_scores(anomaly_scores_all)
|
549 |
anomaly_scores_comp = normalize_scores(anomaly_scores_comp)
|
550 |
|
|
|
551 |
emotion_anomalies = {}
|
552 |
for emotion in ['fear', 'sad', 'angry', 'happy', 'surprise', 'neutral']:
|
553 |
anomalies, scores, indices = emotion_anomaly_detection(df[emotion])
|
|
|
563 |
|
564 |
progress(0.95, "Generating plots")
|
565 |
try:
|
566 |
+
anomaly_plot_all = plot_anomaly_scores(df, anomaly_scores_all, top_indices_all,
|
567 |
+
"Facial Features + Emotions",
|
568 |
df['Timecode'].iloc[top_indices_all].values)
|
569 |
anomaly_plot_comp = plot_anomaly_scores(df, anomaly_scores_comp, top_indices_comp, "Facial Features",
|
570 |
df['Timecode'].iloc[top_indices_comp].values)
|
|
|
607 |
face_samples
|
608 |
)
|
609 |
|
|
|
610 |
iface = gr.Interface(
|
611 |
fn=process_video,
|
612 |
inputs=[
|
|
|
636 |
- Number of Components: Complexity of the facial expression model
|
637 |
- Desired FPS: Frames per second to analyze (lower for faster processing)
|
638 |
- Batch Size: Affects processing speed and memory usage
|
639 |
+
|
640 |
+
Click on any graph to enlarge it.
|
641 |
""",
|
642 |
allow_flagging="never"
|
643 |
)
|