Spaces:
Runtime error
Runtime error
Commit
·
69cf167
1
Parent(s):
f034bba
Update app.py
Browse files
app.py
CHANGED
@@ -21,6 +21,36 @@ from lang_list import (
|
|
21 |
TEXT_SOURCE_LANGUAGE_NAMES,
|
22 |
)
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
|
25 |
if not CHECKPOINTS_PATH.exists():
|
26 |
snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
|
@@ -152,27 +182,35 @@ with gr.Blocks() as demo_s2st:
|
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
154 |
with gr.Group():
|
155 |
-
input_audio = gr.Audio(label="
|
156 |
source_language = gr.Dropdown(
|
157 |
-
label="
|
158 |
choices=ASR_TARGET_LANGUAGE_NAMES,
|
159 |
-
value="
|
160 |
)
|
161 |
target_language = gr.Dropdown(
|
162 |
-
label="
|
163 |
choices=S2ST_TARGET_LANGUAGE_NAMES,
|
164 |
-
value=
|
165 |
)
|
166 |
-
btn = gr.Button("
|
|
|
|
|
167 |
with gr.Column():
|
168 |
with gr.Group():
|
169 |
output_audio = gr.Audio(
|
170 |
-
label="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
autoplay=False,
|
172 |
streaming=False,
|
173 |
type="numpy",
|
174 |
)
|
175 |
-
output_text = gr.Textbox(label="Translated text")
|
176 |
|
177 |
gr.Examples(
|
178 |
examples=[
|
@@ -194,25 +232,28 @@ with gr.Blocks() as demo_s2st:
|
|
194 |
outputs=[output_audio, output_text],
|
195 |
api_name="s2st",
|
196 |
)
|
|
|
|
|
197 |
|
198 |
with gr.Blocks() as demo_s2tt:
|
|
|
199 |
with gr.Row():
|
200 |
with gr.Column():
|
201 |
with gr.Group():
|
202 |
-
input_audio = gr.Audio(label="
|
203 |
source_language = gr.Dropdown(
|
204 |
-
label="
|
205 |
choices=ASR_TARGET_LANGUAGE_NAMES,
|
206 |
-
value="
|
207 |
)
|
208 |
target_language = gr.Dropdown(
|
209 |
-
label="
|
210 |
choices=S2TT_TARGET_LANGUAGE_NAMES,
|
211 |
-
value=
|
212 |
)
|
213 |
-
btn = gr.Button("
|
214 |
with gr.Column():
|
215 |
-
output_text = gr.Textbox(label="
|
216 |
|
217 |
gr.Examples(
|
218 |
examples=[
|
|
|
21 |
TEXT_SOURCE_LANGUAGE_NAMES,
|
22 |
)
|
23 |
|
24 |
+
from scipy.io import wavfile
|
25 |
+
from scipy.io.wavfile import write
|
26 |
+
|
27 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
28 |
+
|
29 |
+
enhance_model = SpectralMaskEnhancement.from_hparams(
|
30 |
+
source="speechbrain/metricgan-plus-voicebank",
|
31 |
+
savedir="pretrained_models/metricgan-plus-voicebank",
|
32 |
+
)
|
33 |
+
|
34 |
+
knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
|
35 |
+
|
36 |
+
def voice_change(audio_in, audio_ref):
|
37 |
+
samplerate1, data1 = wavfile.read(audio_in)
|
38 |
+
samplerate2, data2 = wavfile.read(audio_ref)
|
39 |
+
write("./audio_in.wav", samplerate1, data1)
|
40 |
+
write("./audio_ref.wav", samplerate2, data2)
|
41 |
+
|
42 |
+
query_seq = knn_vc.get_features("./audio_in.wav")
|
43 |
+
matching_set = knn_vc.get_matching_set(["./audio_ref.wav"])
|
44 |
+
out_wav = knn_vc.match(query_seq, matching_set, topk=4)
|
45 |
+
torchaudio.save('output.wav', out_wav[None], 16000)
|
46 |
+
noisy = enhance_model.load_audio(
|
47 |
+
'output.wav'
|
48 |
+
).unsqueeze(0)
|
49 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
50 |
+
torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
|
51 |
+
return 'enhanced.wav'
|
52 |
+
|
53 |
+
|
54 |
CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
|
55 |
if not CHECKPOINTS_PATH.exists():
|
56 |
snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
|
|
|
182 |
with gr.Row():
|
183 |
with gr.Column():
|
184 |
with gr.Group():
|
185 |
+
input_audio = gr.Audio(label="请上传一段语音", type="filepath")
|
186 |
source_language = gr.Dropdown(
|
187 |
+
label="请选择上传语音对应的语言",
|
188 |
choices=ASR_TARGET_LANGUAGE_NAMES,
|
189 |
+
value="Mandarin Chinese",
|
190 |
)
|
191 |
target_language = gr.Dropdown(
|
192 |
+
label="请选择翻译后的语言",
|
193 |
choices=S2ST_TARGET_LANGUAGE_NAMES,
|
194 |
+
value="English",
|
195 |
)
|
196 |
+
btn = gr.Button("开始AI同声传译之旅吧")
|
197 |
+
btn_vc = gr.Button("恢复原本的音色吧!")
|
198 |
+
|
199 |
with gr.Column():
|
200 |
with gr.Group():
|
201 |
output_audio = gr.Audio(
|
202 |
+
label="同声传译后的语音",
|
203 |
+
autoplay=False,
|
204 |
+
streaming=False,
|
205 |
+
type="numpy",
|
206 |
+
)
|
207 |
+
output_text = gr.Textbox(label="翻译后的文本")
|
208 |
+
audio_vc = gr.Audio(
|
209 |
+
label="相同音色的AI专属语音",
|
210 |
autoplay=False,
|
211 |
streaming=False,
|
212 |
type="numpy",
|
213 |
)
|
|
|
214 |
|
215 |
gr.Examples(
|
216 |
examples=[
|
|
|
232 |
outputs=[output_audio, output_text],
|
233 |
api_name="s2st",
|
234 |
)
|
235 |
+
btn_vc.click(voice_change, [output_audio, input_audio], [audio_vc])
|
236 |
+
|
237 |
|
238 |
with gr.Blocks() as demo_s2tt:
|
239 |
+
|
240 |
with gr.Row():
|
241 |
with gr.Column():
|
242 |
with gr.Group():
|
243 |
+
input_audio = gr.Audio(label="请上传一段语音", type="filepath")
|
244 |
source_language = gr.Dropdown(
|
245 |
+
label="请选择上传语音对应的语言",
|
246 |
choices=ASR_TARGET_LANGUAGE_NAMES,
|
247 |
+
value="Mandarin Chinese",
|
248 |
)
|
249 |
target_language = gr.Dropdown(
|
250 |
+
label="请选择翻译后的语言",
|
251 |
choices=S2TT_TARGET_LANGUAGE_NAMES,
|
252 |
+
value="English",
|
253 |
)
|
254 |
+
btn = gr.Button("开始AI翻译之旅吧!")
|
255 |
with gr.Column():
|
256 |
+
output_text = gr.Textbox(label="翻译后的文本")
|
257 |
|
258 |
gr.Examples(
|
259 |
examples=[
|