reab5555 commited on
Commit
a80d5f3
·
verified ·
1 Parent(s): 36f6fd8

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +4 -3
video_processing.py CHANGED
@@ -8,7 +8,7 @@ from PIL import Image, ImageDraw, ImageFont
8
  import math
9
  from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
10
  from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
11
- from voice_analysis import extract_audio_from_video, diarize_speakers, get_speaker_embeddings
12
  from anomaly_detection import anomaly_detection
13
  from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
14
  from utils import frame_to_timecode
@@ -154,11 +154,12 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
154
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
155
 
156
  progress(0.8, "Extracting audio and performing voice analysis")
157
-
158
  audio_path = extract_audio_from_video(video_path)
159
  diarization = diarize_speakers(audio_path)
160
  voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
161
 
 
162
 
163
  progress(0.85, "Performing anomaly detection")
164
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
@@ -177,7 +178,7 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
177
  X_voice = np.array([emb for emb in aligned_voice_embeddings])
178
 
179
  mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
180
-
181
  progress(0.9, "Generating graphs")
182
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
183
  color=GRAPH_COLORS['facial_embeddings'],
 
8
  import math
9
  from face_analysis import get_face_embedding, cluster_faces, organize_faces_by_person, draw_facial_landmarks
10
  from pose_analysis import pose, calculate_posture_score, draw_pose_landmarks
11
+ from voice_analysis import get_speaker_embeddings, align_voice_embeddings
12
  from anomaly_detection import anomaly_detection
13
  from visualization import plot_mse, plot_mse_histogram, plot_mse_heatmap, create_video_with_heatmap
14
  from utils import frame_to_timecode
 
154
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
155
 
156
  progress(0.8, "Extracting audio and performing voice analysis")
157
+
158
  audio_path = extract_audio_from_video(video_path)
159
  diarization = diarize_speakers(audio_path)
160
  voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
161
 
162
+ aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
163
 
164
  progress(0.85, "Performing anomaly detection")
165
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
 
178
  X_voice = np.array([emb for emb in aligned_voice_embeddings])
179
 
180
  mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
181
+
182
  progress(0.9, "Generating graphs")
183
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
184
  color=GRAPH_COLORS['facial_embeddings'],