Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

reab5555 commited on Jul 28, 2024

Commit

b9da44c

verified ·

1 Parent(s): dcaabd2

Update voice_analysis.py

Files changed (1) hide show

voice_analysis.py CHANGED Viewed

@@ -43,10 +43,17 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
         if segment.shape[1] == 0:
             continue
-        # Ensure the segment is long enough (at least 1 second)
-        if segment.shape[1] < sample_rate:
-            padding = torch.zeros(1, sample_rate - segment.shape[1])
             segment = torch.cat([segment, padding], dim=1)
         with torch.no_grad():
             embedding = model(segment)  # Pass the tensor directly, not a dictionary
@@ -56,7 +63,6 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
     return embeddings
 def align_voice_embeddings(voice_embeddings, frame_count, fps):
-    import numpy as np
     aligned_embeddings = []
     current_embedding_index = 0

         if segment.shape[1] == 0:
             continue
+        # Ensure the segment is long enough (at least 2 seconds)
+        if segment.shape[1] < 2 * sample_rate:
+            padding = torch.zeros(1, 2 * sample_rate - segment.shape[1])
             segment = torch.cat([segment, padding], dim=1)
+        # Ensure the segment is not too long (maximum 10 seconds)
+        if segment.shape[1] > 10 * sample_rate:
+            segment = segment[:, :10 * sample_rate]
+        # Reshape the segment to match the model's expected input
+        segment = segment.unsqueeze(0)  # Add batch dimension
         with torch.no_grad():
             embedding = model(segment)  # Pass the tensor directly, not a dictionary
     return embeddings
 def align_voice_embeddings(voice_embeddings, frame_count, fps):
     aligned_embeddings = []
     current_embedding_index = 0