Update video_processing.py
Browse files- video_processing.py +10 -1
video_processing.py
CHANGED
@@ -160,15 +160,24 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
160 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
|
161 |
|
162 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
163 |
-
|
164 |
progress(0.85, "Performing anomaly detection")
|
165 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
166 |
|
|
|
167 |
X_embeddings = df[embedding_columns].values
|
168 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
169 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
170 |
X_voice = np.array(aligned_voice_embeddings)
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
try:
|
173 |
if len(X_posture) == 0:
|
174 |
raise ValueError("No valid posture data found")
|
|
|
160 |
voice_embeddings, audio_duration = get_speaker_embeddings(audio_path, diarization, "pyannote/embedding")
|
161 |
|
162 |
aligned_voice_embeddings = align_voice_embeddings(voice_embeddings, frame_count, original_fps, audio_duration)
|
163 |
+
|
164 |
progress(0.85, "Performing anomaly detection")
|
165 |
embedding_columns = [col for col in df.columns if col.startswith('Raw_Embedding_')]
|
166 |
|
167 |
+
# Ensure X_voice has the same length as X_embeddings
|
168 |
X_embeddings = df[embedding_columns].values
|
169 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
170 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
171 |
X_voice = np.array(aligned_voice_embeddings)
|
172 |
|
173 |
+
|
174 |
+
# Trim or pad X_voice to match X_embeddings length
|
175 |
+
if len(X_voice) > len(X_embeddings):
|
176 |
+
X_voice = X_voice[:len(X_embeddings)]
|
177 |
+
elif len(X_voice) < len(X_embeddings):
|
178 |
+
padding = np.zeros((len(X_embeddings) - len(X_voice), X_voice.shape[1]))
|
179 |
+
X_voice = np.vstack((X_voice, padding))
|
180 |
+
|
181 |
try:
|
182 |
if len(X_posture) == 0:
|
183 |
raise ValueError("No valid posture data found")
|