Add-Vishnu commited on
Commit
98f5625
·
1 Parent(s): 4b3d742

Added resampling code and commented

Browse files
Files changed (1) hide show
  1. asr.py +12 -0
asr.py CHANGED
@@ -13,10 +13,22 @@ model_id_lid = "facebook/mms-lid-126"
13
  processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
14
  model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
15
 
 
 
 
 
 
16
  def transcribe(audio):
17
  print(audio)
18
  audio = librosa.load(audio, sr=16_000, mono=True)[0]
 
 
 
 
 
 
19
  inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
 
20
  with torch.no_grad():
21
  tr_start_time = time.time()
22
  outputs = model(**inputs).logits
 
13
  processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
14
  model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
15
 
16
+ def resample_to_16k(audio, orig_sr):
17
+ y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
18
+ return y_resampled
19
+
20
+
21
  def transcribe(audio):
22
  print(audio)
23
  audio = librosa.load(audio, sr=16_000, mono=True)[0]
24
+ print("After loading: ",audio)
25
+ sr,y = audio
26
+ y = y.astype(np.float32)
27
+ y /= np.max(np.abs(y))
28
+ y_resampled = resample_to_16k(y, sr)
29
+ print("Without using librosa to load:",y_resampled)
30
  inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
31
+ # inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
32
  with torch.no_grad():
33
  tr_start_time = time.time()
34
  outputs = model(**inputs).logits