Spaces:
Sleeping
Sleeping
fix
Browse files
app.py
CHANGED
@@ -10,12 +10,7 @@ from audio_recorder_streamlit import audio_recorder
|
|
10 |
def load_asr_model():
|
11 |
return load_model()
|
12 |
|
13 |
-
asr_model = load_asr_model()
|
14 |
-
|
15 |
-
|
16 |
-
def transcribe(audio_file):
|
17 |
-
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
18 |
-
return transcript
|
19 |
|
20 |
|
21 |
def save_audio_file(audio_bytes, file_extension):
|
@@ -43,7 +38,7 @@ def transcribe_audio(file_path):
|
|
43 |
:return: The transcribed text
|
44 |
"""
|
45 |
with open(file_path, "rb") as audio_file:
|
46 |
-
transcript = inference(asr_model, audio_file)
|
47 |
return transcript
|
48 |
|
49 |
|
|
|
10 |
def load_asr_model():
|
11 |
return load_model()
|
12 |
|
13 |
+
processor, asr_model = load_asr_model()
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
def save_audio_file(audio_bytes, file_extension):
|
|
|
38 |
:return: The transcribed text
|
39 |
"""
|
40 |
with open(file_path, "rb") as audio_file:
|
41 |
+
transcript = inference(processor, asr_model, audio_file)
|
42 |
return transcript
|
43 |
|
44 |
|
asr.py
CHANGED
@@ -14,12 +14,13 @@ def load_model():
|
|
14 |
target_lang = "oji"
|
15 |
processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang, use_auth_token=hf_token)
|
16 |
model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True, use_safetensors=True, use_auth_token=hf_token)
|
|
|
17 |
|
18 |
|
19 |
-
def inference(model,
|
20 |
-
|
21 |
# arr.squeeze().numpy(), ...
|
22 |
-
inputs = processor(
|
23 |
|
24 |
with torch.no_grad():
|
25 |
outputs = model(**inputs).logits
|
|
|
14 |
target_lang = "oji"
|
15 |
processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang, use_auth_token=hf_token)
|
16 |
model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True, use_safetensors=True, use_auth_token=hf_token)
|
17 |
+
return processor, model
|
18 |
|
19 |
|
20 |
+
def inference(processor, model, audio_path):
|
21 |
+
arr, rate = read_audio_data(audio_path)
|
22 |
# arr.squeeze().numpy(), ...
|
23 |
+
inputs = processor(arr.squeeze().numpy(), sampling_rate=16_000, return_tensors="pt")
|
24 |
|
25 |
with torch.no_grad():
|
26 |
outputs = model(**inputs).logits
|