File size: 1,519 Bytes
6a2d9d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
model_path = "kannada.nemo"
lang_id = "kn"

import torch
import soundfile as sf
import nemo.collections.asr as nemo_asr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_path)
model.eval() # inference mode
model = model.to(device)

'''
model.cur_decoder = "ctc"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)
'''
model.cur_decoder = "rnnt"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)


'''
import time

# Start timing for CTC decoder
start_time_ctc = time.time()

model.cur_decoder = "ctc"
ctc_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(ctc_text)

end_time_ctc = time.time()
ctc_duration = end_time_ctc - start_time_ctc
print(f"CTC transcription took {ctc_duration:.4f} seconds")

# Start timing for RNNT decoder
start_time_rnnt = time.time()

model.cur_decoder = "rnnt"
rnnt_text = model.transcribe(['kannada_query_infer.wav'], batch_size=1, logprobs=False, language_id=lang_id)[0]
print(rnnt_text)

end_time_rnnt = time.time()
rnnt_duration = end_time_rnnt - start_time_rnnt
print(f"RNNT transcription took {rnnt_duration:.4f} seconds")

# Calculate and print the speed difference
speed_difference = rnnt_duration - ctc_duration
print(f"Speed difference: {speed_difference:.4f} seconds")

'''