Spaces:
Runtime error
Runtime error
Commit
·
98f5625
1
Parent(s):
4b3d742
Added resampling code and commented
Browse files
asr.py
CHANGED
@@ -13,10 +13,22 @@ model_id_lid = "facebook/mms-lid-126"
|
|
13 |
processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
|
14 |
model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
def transcribe(audio):
|
17 |
print(audio)
|
18 |
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
|
|
20 |
with torch.no_grad():
|
21 |
tr_start_time = time.time()
|
22 |
outputs = model(**inputs).logits
|
|
|
13 |
processor_lid = AutoFeatureExtractor.from_pretrained(model_id_lid)
|
14 |
model_lid = Wav2Vec2ForSequenceClassification.from_pretrained(model_id_lid)
|
15 |
|
16 |
+
def resample_to_16k(audio, orig_sr):
|
17 |
+
y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
|
18 |
+
return y_resampled
|
19 |
+
|
20 |
+
|
21 |
def transcribe(audio):
|
22 |
print(audio)
|
23 |
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
24 |
+
print("After loading: ",audio)
|
25 |
+
sr,y = audio
|
26 |
+
y = y.astype(np.float32)
|
27 |
+
y /= np.max(np.abs(y))
|
28 |
+
y_resampled = resample_to_16k(y, sr)
|
29 |
+
print("Without using librosa to load:",y_resampled)
|
30 |
inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
31 |
+
# inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
|
32 |
with torch.no_grad():
|
33 |
tr_start_time = time.time()
|
34 |
outputs = model(**inputs).logits
|