Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,11 +40,10 @@ decoding_cfg = model.cfg.decoding
|
|
40 |
decoding_cfg.beam.beam_size = 1
|
41 |
model.change_decoding_strategy(decoding_cfg)
|
42 |
|
43 |
-
'''
|
44 |
vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
45 |
vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
46 |
set_seed(555)
|
47 |
-
|
48 |
|
49 |
def text_to_speech(text_response):
|
50 |
inputs = vits_tokenizer(text=text_response, return_tensors="pt")
|
@@ -87,7 +86,7 @@ def transcribe(audio_filepath):
|
|
87 |
"source_lang": "en",
|
88 |
"target_lang": "en",
|
89 |
"taskname": "asr",
|
90 |
-
"pnc": "
|
91 |
"answer": "predict",
|
92 |
"duration": str(duration),
|
93 |
}
|
@@ -121,12 +120,12 @@ def generate_response(user_input):
|
|
121 |
with torch.no_grad():
|
122 |
outputs = proc_model.generate(
|
123 |
inputs,
|
124 |
-
max_new_tokens=
|
125 |
)
|
126 |
|
127 |
response = proc_tokenizer.batch_decode(
|
128 |
outputs,
|
129 |
-
|
130 |
clean_up_tokenization_spaces=False,
|
131 |
)[0]
|
132 |
|
@@ -137,15 +136,19 @@ def CanaryPhi(audio_filepath):
|
|
137 |
print(user_input)
|
138 |
response = generate_response(user_input)
|
139 |
print(response)
|
|
|
140 |
return response
|
141 |
|
142 |
|
143 |
# Create a Gradio interface
|
144 |
iface = gr.Interface(
|
145 |
fn=CanaryPhi,
|
146 |
-
inputs=gr.Audio(sources="microphone", type="filepath"),
|
147 |
-
|
|
|
|
|
148 |
)
|
149 |
|
150 |
# Launch the interface
|
|
|
151 |
iface.launch()
|
|
|
40 |
decoding_cfg.beam.beam_size = 1
|
41 |
model.change_decoding_strategy(decoding_cfg)
|
42 |
|
|
|
43 |
vits_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
44 |
vits_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
45 |
set_seed(555)
|
46 |
+
|
47 |
|
48 |
def text_to_speech(text_response):
|
49 |
inputs = vits_tokenizer(text=text_response, return_tensors="pt")
|
|
|
86 |
"source_lang": "en",
|
87 |
"target_lang": "en",
|
88 |
"taskname": "asr",
|
89 |
+
"pnc": "yes",
|
90 |
"answer": "predict",
|
91 |
"duration": str(duration),
|
92 |
}
|
|
|
120 |
with torch.no_grad():
|
121 |
outputs = proc_model.generate(
|
122 |
inputs,
|
123 |
+
max_new_tokens=48,
|
124 |
)
|
125 |
|
126 |
response = proc_tokenizer.batch_decode(
|
127 |
outputs,
|
128 |
+
skip_special_tokens=True,
|
129 |
clean_up_tokenization_spaces=False,
|
130 |
)[0]
|
131 |
|
|
|
136 |
print(user_input)
|
137 |
response = generate_response(user_input)
|
138 |
print(response)
|
139 |
+
chatty_response = text_to_speech(response)
|
140 |
return response
|
141 |
|
142 |
|
143 |
# Create a Gradio interface
|
144 |
iface = gr.Interface(
|
145 |
fn=CanaryPhi,
|
146 |
+
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
|
147 |
+
#inputs=gr.Textbox(lines=5, placeholder="Enter your text here..."),
|
148 |
+
#outputs=gr.Textbox(),
|
149 |
+
outputs=gr.Audio("response.wav"),
|
150 |
)
|
151 |
|
152 |
# Launch the interface
|
153 |
+
iface.queue()
|
154 |
iface.launch()
|