Spaces:
Runtime error
Runtime error
Commit
·
98f5625
1
Parent(s):
4b3d742
Added resampling code and commented
Browse files
asr.py
CHANGED
|
@@ -13,10 +13,22 @@ model_id_lid = "facebook/mms-lid-126"
|
|
| 13 |
processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
|
| 14 |
model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def transcribe(audio):
|
| 17 |
print(audio)
|
| 18 |
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
|
|
|
| 20 |
with torch.no_grad():
|
| 21 |
tr_start_time = time.time()
|
| 22 |
outputs = model(**inputs).logits
|
|
|
|
| 13 |
processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
|
| 14 |
model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
|
| 15 |
|
| 16 |
+
def resample_to_16k(audio, orig_sr):
|
| 17 |
+
y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
|
| 18 |
+
return y_resampled
|
| 19 |
+
|
| 20 |
+
|
| 21 |
def transcribe(audio):
|
| 22 |
print(audio)
|
| 23 |
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
| 24 |
+
print("After loading: ",audio)
|
| 25 |
+
sr,y = audio
|
| 26 |
+
y = y.astype(np.float32)
|
| 27 |
+
y /= np.max(np.abs(y))
|
| 28 |
+
y_resampled = resample_to_16k(y, sr)
|
| 29 |
+
print("Without using librosa to load:",y_resampled)
|
| 30 |
inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
| 31 |
+
# inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
|
| 32 |
with torch.no_grad():
|
| 33 |
tr_start_time = time.time()
|
| 34 |
outputs = model(**inputs).logits
|