Spaces:
Runtime error
Runtime error
Commit
·
e9b4267
1
Parent(s):
93ed5b8
Update asr.py
Browse files
asr.py
CHANGED
@@ -41,9 +41,16 @@ def transcribe(audio):
|
|
41 |
|
42 |
def detect_language(audio):
|
43 |
print(audio)
|
44 |
-
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# print(audio)
|
46 |
-
inputs_lid = processor_lid(audio, sampling_rate=16_000, return_tensors="pt")
|
47 |
with torch.no_grad():
|
48 |
start_time_lid = time.time()
|
49 |
outputs_lid = model_lid(**inputs_lid).logits
|
@@ -56,11 +63,17 @@ def detect_language(audio):
|
|
56 |
|
57 |
|
58 |
def transcribe_lang(audio,lang):
|
59 |
-
audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
|
|
|
|
|
|
|
|
|
|
60 |
processor.tokenizer.set_target_lang(lang)
|
61 |
model.load_adapter(lang)
|
62 |
print(lang)
|
63 |
-
inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
|
|
64 |
with torch.no_grad():
|
65 |
tr_start_time = time.time()
|
66 |
outputs = model(**inputs).logits
|
|
|
41 |
|
42 |
def detect_language(audio):
|
43 |
print(audio)
|
44 |
+
# audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
45 |
+
sr,y = audio
|
46 |
+
y = y.astype(np.float32)
|
47 |
+
y /= np.max(np.abs(y))
|
48 |
+
y_resampled = resample_to_16k(y, sr)
|
49 |
+
print("Without using librosa to load:",y_resampled)
|
50 |
+
# inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
51 |
+
inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
|
52 |
# print(audio)
|
53 |
+
# inputs_lid = processor_lid(audio, sampling_rate=16_000, return_tensors="pt")
|
54 |
with torch.no_grad():
|
55 |
start_time_lid = time.time()
|
56 |
outputs_lid = model_lid(**inputs_lid).logits
|
|
|
63 |
|
64 |
|
65 |
def transcribe_lang(audio,lang):
|
66 |
+
# audio = librosa.load(audio, sr=16_000, mono=True)[0]
|
67 |
+
sr,y = audio
|
68 |
+
y = y.astype(np.float32)
|
69 |
+
y /= np.max(np.abs(y))
|
70 |
+
y_resampled = resample_to_16k(y, sr)
|
71 |
+
print("Without using librosa to load:",y_resampled)
|
72 |
processor.tokenizer.set_target_lang(lang)
|
73 |
model.load_adapter(lang)
|
74 |
print(lang)
|
75 |
+
# inputs = processor(audio, sampling_rate=16_000,return_tensors="pt")
|
76 |
+
inputs = processor(y_resampled, sampling_rate=16_000,return_tensors="pt")
|
77 |
with torch.no_grad():
|
78 |
tr_start_time = time.time()
|
79 |
outputs = model(**inputs).logits
|