Spaces:

gdnartea
/

Chatty_Ashe

Runtime error

gdnartea commited on May 2, 2024

Commit

9dc2324

verified ·

1 Parent(s): 197f7f7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,9 @@ from nemo.collections.asr.models import ASRModel
 from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
 from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
 torch.random.manual_seed(0)
 proc_model_name = "microsoft/Phi-3-mini-4k-instruct"
@@ -27,6 +29,9 @@ proc_model = AutoModelForCausalLM.from_pretrained(
 proc_model.to("cpu")
 proc_tokenizer = AutoTokenizer.from_pretrained(proc_model_name)
 SAMPLE_RATE = 16000 # Hz
 MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
@@ -40,10 +45,16 @@ decoding_cfg = model.cfg.decoding
 decoding_cfg.beam.beam_size = 1
 model.change_decoding_strategy(decoding_cfg)
 vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 set_seed(555)
 def text_to_speech(text_response):
     inputs = vits_tokenizer(text=text_response, return_tensors="pt")
@@ -137,7 +148,7 @@ def CanaryPhi(audio_filepath):
     response = generate_response(user_input)
     print(response)
     chatty_response = text_to_speech(response)
-    return response
 # Create a Gradio interface

 from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTaskAED
 from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
+import tracemalloc as tm
+tm.start()
 torch.random.manual_seed(0)
 proc_model_name = "microsoft/Phi-3-mini-4k-instruct"
 proc_model.to("cpu")
 proc_tokenizer = AutoTokenizer.from_pretrained(proc_model_name)
+print(tm.get_traced_memory())
+tm.stop()
 SAMPLE_RATE = 16000 # Hz
 MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
 decoding_cfg.beam.beam_size = 1
 model.change_decoding_strategy(decoding_cfg)
+print(tm.get_traced_memory())
 vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 set_seed(555)
+print(tm.get_traced_memory())
+tm.stop()
 def text_to_speech(text_response):
     inputs = vits_tokenizer(text=text_response, return_tensors="pt")
     response = generate_response(user_input)
     print(response)
     chatty_response = text_to_speech(response)
+    return chatty_response
 # Create a Gradio interface