gdnartea commited on
Commit
d1e03b7
·
verified ·
1 Parent(s): 70fb7bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
4
- from nemo.collections.asr.models import ASRModel
5
 
6
 
7
  # load speech to text model
8
- canary_model = ASRModel.from_pretrained('nvidia/canary-1b')
9
  canary_model.eval()
10
 
11
  # update decode params
@@ -34,7 +34,11 @@ tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
34
 
35
  def process_speech(speech):
36
  # Convert the speech to text
37
- transcription = canary_model.transcribe(speech, logprobs=False)
 
 
 
 
38
 
39
  # Process the text
40
  inputs = proc_tokenizer.encode(transcription + proc_tokenizer.eos_token, return_tensors='pt')
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
4
+ from nemo.collections.asr.models import EncDecMultiTaskModel
5
 
6
 
7
  # load speech to text model
8
+ canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
9
  canary_model.eval()
10
 
11
  # update decode params
 
34
 
35
  def process_speech(speech):
36
  # Convert the speech to text
37
+ transcription = canary_model.transcribe(
38
+ speech,
39
+ batch_size=16,
40
+ logprobs=False,
41
+ )
42
 
43
  # Process the text
44
  inputs = proc_tokenizer.encode(transcription + proc_tokenizer.eos_token, return_tensors='pt')