Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

reab5555 commited on Jul 28, 2024

Commit

9263021

verified ·

1 Parent(s): b9da44c

Update voice_analysis.py

Files changed (1) hide show

voice_analysis.py CHANGED Viewed

@@ -33,6 +33,7 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
     model.eval()  # Set the model to evaluation mode
     waveform, sample_rate = torchaudio.load(audio_path)
     embeddings = []
     for turn, _, speaker in diarization.itertracks(yield_label=True):
@@ -40,6 +41,8 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
         end_frame = int(turn.end * sample_rate)
         segment = waveform[:, start_frame:end_frame]
         if segment.shape[1] == 0:
             continue
@@ -54,6 +57,8 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
         # Reshape the segment to match the model's expected input
         segment = segment.unsqueeze(0)  # Add batch dimension
         with torch.no_grad():
             embedding = model(segment)  # Pass the tensor directly, not a dictionary

     model.eval()  # Set the model to evaluation mode
     waveform, sample_rate = torchaudio.load(audio_path)
+    print(f"Sample rate: {sample_rate}")
     embeddings = []
     for turn, _, speaker in diarization.itertracks(yield_label=True):
         end_frame = int(turn.end * sample_rate)
         segment = waveform[:, start_frame:end_frame]
+        print(f"Segment shape before processing: {segment.shape}")
         if segment.shape[1] == 0:
             continue
         # Reshape the segment to match the model's expected input
         segment = segment.unsqueeze(0)  # Add batch dimension
+        print(f"Segment shape after processing: {segment.shape}")
         with torch.no_grad():
             embedding = model(segment)  # Pass the tensor directly, not a dictionary