Spaces:
Sleeping
Sleeping
File size: 1,519 Bytes
6a2d9d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
model_path = "kannada.nemo"
lang_id = "kn"
import torch
import soundfile as sf
import nemo.collections.asr as nemo_asr
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
model.eval() # inference mode
model = model.to(device)
'''
model.cur_decoder = "ctc"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)
'''
model.cur_decoder = "rnnt"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)
'''
import time
# Start timing for CTC decoder
start_time_ctc = time.time()
model.cur_decoder = "ctc"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)
end_time_ctc = time.time()
ctc_duration = end_time_ctc - start_time_ctc
print(f"CTC transcription took {ctc_duration:.4f} seconds")
# Start timing for RNNT decoder
start_time_rnnt = time.time()
model.cur_decoder = "rnnt"
rnnt_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(rnnt_text)
end_time_rnnt = time.time()
rnnt_duration = end_time_rnnt - start_time_rnnt
print(f"RNNT transcription took {rnnt_duration:.4f} seconds")
# Calculate and print the speed difference
speed_difference = rnnt_duration - ctc_duration
print(f"Speed difference: {speed_difference:.4f} seconds")
''' |