Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

reab5555 commited on Jul 28, 2024

Commit

7dcd172

verified ·

1 Parent(s): 5fefb86

Update video_processing.py

Files changed (1) hide show

video_processing.py CHANGED Viewed

@@ -160,15 +160,24 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
         voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
         aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
         progress(0.85, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
         X_embeddings = df[embedding_columns].values
         X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
         X_posture = X_posture[X_posture != None].reshape(-1, 1)
         X_voice = np.array(aligned_voice_embeddings)
         try:
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")

         voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
         aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
         progress(0.85, "Performing anomaly detection")
         embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
+        # Ensure X_voice has the same length as X_embeddings
         X_embeddings = df[embedding_columns].values
         X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
         X_posture = X_posture[X_posture != None].reshape(-1, 1)
         X_voice = np.array(aligned_voice_embeddings)
+        # Trim or pad X_voice to match X_embeddings length
+        if len(X_voice) > len(X_embeddings):
+            X_voice = X_voice[:len(X_embeddings)]
+        elif len(X_voice) < len(X_embeddings):
+            padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
+            X_voice = np.vstack((X_voice, padding))
         try:
             if len(X_posture) == 0:
                 raise ValueError("No valid posture data found")