gdnartea commited on
Commit
9dc2324
·
verified ·
1 Parent(s): 197f7f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -13,7 +13,9 @@ from nemo.collections.asr.models import ASRModel
13
  from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
14
  from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
15
 
 
16
 
 
17
 
18
  torch.random.manual_seed(0)
19
  proc_model_name = "microsoft/Phi-3-mini-4k-instruct"
@@ -27,6 +29,9 @@ proc_model = AutoModelForCausalLM.from_pretrained(
27
  proc_model.to("cpu")
28
  proc_tokenizer = AutoTokenizer.from_pretrained(proc_model_name)
29
 
 
 
 
30
 
31
  SAMPLE_RATE = 16000 # Hz
32
  MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
@@ -40,10 +45,16 @@ decoding_cfg = model.cfg.decoding
40
  decoding_cfg.beam.beam_size = 1
41
  model.change_decoding_strategy(decoding_cfg)
42
 
 
 
 
 
43
  vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
44
  vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
45
  set_seed(555)
46
 
 
 
47
 
48
  def text_to_speech(text_response):
49
  inputs = vits_tokenizer(text=text_response, return_tensors="pt")
@@ -137,7 +148,7 @@ def CanaryPhi(audio_filepath):
137
  response = generate_response(user_input)
138
  print(response)
139
  chatty_response = text_to_speech(response)
140
- return response
141
 
142
 
143
  # Create a Gradio interface
 
13
  from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
14
  from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
15
 
16
+ import tracemalloc as tm
17
 
18
+ tm.start()
19
 
20
  torch.random.manual_seed(0)
21
  proc_model_name = "microsoft/Phi-3-mini-4k-instruct"
 
29
  proc_model.to("cpu")
30
  proc_tokenizer = AutoTokenizer.from_pretrained(proc_model_name)
31
 
32
+ print(tm.get_traced_memory())
33
+ tm.stop()
34
+
35
 
36
  SAMPLE_RATE = 16000 # Hz
37
  MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
 
45
  decoding_cfg.beam.beam_size = 1
46
  model.change_decoding_strategy(decoding_cfg)
47
 
48
+ print(tm.get_traced_memory())
49
+
50
+
51
+
52
  vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
53
  vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
54
  set_seed(555)
55
 
56
+ print(tm.get_traced_memory())
57
+ tm.stop()
58
 
59
  def text_to_speech(text_response):
60
  inputs = vits_tokenizer(text=text_response, return_tensors="pt")
 
148
  response = generate_response(user_input)
149
  print(response)
150
  chatty_response = text_to_speech(response)
151
+ return chatty_response
152
 
153
 
154
  # Create a Gradio interface