reab5555 commited on
Commit
b9da44c
·
verified ·
1 Parent(s): dcaabd2

Update voice_analysis.py

Browse files
Files changed (1) hide show
  1. voice_analysis.py +10 -4
voice_analysis.py CHANGED
@@ -43,10 +43,17 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
43
  if segment.shape[1] == 0:
44
  continue
45
 
46
- # Ensure the segment is long enough (at least 1 second)
47
- if segment.shape[1] < sample_rate:
48
- padding = torch.zeros(1, sample_rate - segment.shape[1])
49
  segment = torch.cat([segment, padding], dim=1)
 
 
 
 
 
 
 
50
 
51
  with torch.no_grad():
52
  embedding = model(segment) # Pass the tensor directly, not a dictionary
@@ -56,7 +63,6 @@ def get_speaker_embeddings(audio_path, diarization, model_name="pyannote/embeddi
56
  return embeddings
57
 
58
  def align_voice_embeddings(voice_embeddings, frame_count, fps):
59
- import numpy as np
60
  aligned_embeddings = []
61
  current_embedding_index = 0
62
 
 
43
  if segment.shape[1] == 0:
44
  continue
45
 
46
+ # Ensure the segment is long enough (at least 2 seconds)
47
+ if segment.shape[1] < 2 * sample_rate:
48
+ padding = torch.zeros(1, 2 * sample_rate - segment.shape[1])
49
  segment = torch.cat([segment, padding], dim=1)
50
+
51
+ # Ensure the segment is not too long (maximum 10 seconds)
52
+ if segment.shape[1] > 10 * sample_rate:
53
+ segment = segment[:, :10 * sample_rate]
54
+
55
+ # Reshape the segment to match the model's expected input
56
+ segment = segment.unsqueeze(0) # Add batch dimension
57
 
58
  with torch.no_grad():
59
  embedding = model(segment) # Pass the tensor directly, not a dictionary
 
63
  return embeddings
64
 
65
  def align_voice_embeddings(voice_embeddings, frame_count, fps):
 
66
  aligned_embeddings = []
67
  current_embedding_index = 0
68