reab5555 commited on
Commit
baa0676
·
verified ·
1 Parent(s): 4a67bd7

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +6 -2
video_processing.py CHANGED
@@ -154,9 +154,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
154
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
155
 
156
  progress(0.8, "Extracting audio and performing voice analysis")
 
157
  audio_path = extract_audio_from_video(video_path)
158
  diarization = diarize_speakers(audio_path)
159
- voice_embeddings = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
 
160
 
161
  progress(0.85, "Performing anomaly detection")
162
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
@@ -170,7 +172,9 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
170
  if len(X_posture) == 0:
171
  raise ValueError("No valid posture data found")
172
 
173
- X_voice = np.array([emb['embedding'] for emb in voice_embeddings])
 
 
174
 
175
  mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
176
 
 
154
  face_samples = get_all_face_samples(organized_faces_folder, output_folder, largest_cluster)
155
 
156
  progress(0.8, "Extracting audio and performing voice analysis")
157
+
158
  audio_path = extract_audio_from_video(video_path)
159
  diarization = diarize_speakers(audio_path)
160
+ voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
161
+
162
 
163
  progress(0.85, "Performing anomaly detection")
164
  embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
 
172
  if len(X_posture) == 0:
173
  raise ValueError("No valid posture data found")
174
 
175
+ aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
176
+
177
+ X_voice = np.array([emb for emb in aligned_voice_embeddings])
178
 
179
  mse_embeddings, mse_posture, mse_voice = anomaly_detection(X_embeddings, X_posture, X_voice)
180