kevinwang676 commited on
Commit
69cf167
·
1 Parent(s): f034bba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -15
app.py CHANGED
@@ -21,6 +21,36 @@ from lang_list import (
21
  TEXT_SOURCE_LANGUAGE_NAMES,
22
  )
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
25
  if not CHECKPOINTS_PATH.exists():
26
  snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
@@ -152,27 +182,35 @@ with gr.Blocks() as demo_s2st:
152
  with gr.Row():
153
  with gr.Column():
154
  with gr.Group():
155
- input_audio = gr.Audio(label="Input speech", type="filepath")
156
  source_language = gr.Dropdown(
157
- label="Source language",
158
  choices=ASR_TARGET_LANGUAGE_NAMES,
159
- value="English",
160
  )
161
  target_language = gr.Dropdown(
162
- label="Target language",
163
  choices=S2ST_TARGET_LANGUAGE_NAMES,
164
- value=DEFAULT_TARGET_LANGUAGE,
165
  )
166
- btn = gr.Button("Translate")
 
 
167
  with gr.Column():
168
  with gr.Group():
169
  output_audio = gr.Audio(
170
- label="Translated speech",
 
 
 
 
 
 
 
171
  autoplay=False,
172
  streaming=False,
173
  type="numpy",
174
  )
175
- output_text = gr.Textbox(label="Translated text")
176
 
177
  gr.Examples(
178
  examples=[
@@ -194,25 +232,28 @@ with gr.Blocks() as demo_s2st:
194
  outputs=[output_audio, output_text],
195
  api_name="s2st",
196
  )
 
 
197
 
198
  with gr.Blocks() as demo_s2tt:
 
199
  with gr.Row():
200
  with gr.Column():
201
  with gr.Group():
202
- input_audio = gr.Audio(label="Input speech", type="filepath")
203
  source_language = gr.Dropdown(
204
- label="Source language",
205
  choices=ASR_TARGET_LANGUAGE_NAMES,
206
- value="English",
207
  )
208
  target_language = gr.Dropdown(
209
- label="Target language",
210
  choices=S2TT_TARGET_LANGUAGE_NAMES,
211
- value=DEFAULT_TARGET_LANGUAGE,
212
  )
213
- btn = gr.Button("Translate")
214
  with gr.Column():
215
- output_text = gr.Textbox(label="Translated text")
216
 
217
  gr.Examples(
218
  examples=[
 
21
  TEXT_SOURCE_LANGUAGE_NAMES,
22
  )
23
 
24
+ from scipy.io import wavfile
25
+ from scipy.io.wavfile import write
26
+
27
+ from speechbrain.pretrained import SpectralMaskEnhancement
28
+
29
+ enhance_model = SpectralMaskEnhancement.from_hparams(
30
+ source="speechbrain/metricgan-plus-voicebank",
31
+ savedir="pretrained_models/metricgan-plus-voicebank",
32
+ )
33
+
34
+ knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
35
+
36
+ def voice_change(audio_in, audio_ref):
37
+ samplerate1, data1 = wavfile.read(audio_in)
38
+ samplerate2, data2 = wavfile.read(audio_ref)
39
+ write("./audio_in.wav", samplerate1, data1)
40
+ write("./audio_ref.wav", samplerate2, data2)
41
+
42
+ query_seq = knn_vc.get_features("./audio_in.wav")
43
+ matching_set = knn_vc.get_matching_set(["./audio_ref.wav"])
44
+ out_wav = knn_vc.match(query_seq, matching_set, topk=4)
45
+ torchaudio.save('output.wav', out_wav[None], 16000)
46
+ noisy = enhance_model.load_audio(
47
+ 'output.wav'
48
+ ).unsqueeze(0)
49
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
50
+ torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
51
+ return 'enhanced.wav'
52
+
53
+
54
  CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
55
  if not CHECKPOINTS_PATH.exists():
56
  snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
 
182
  with gr.Row():
183
  with gr.Column():
184
  with gr.Group():
185
+ input_audio = gr.Audio(label="请上传一段语音", type="filepath")
186
  source_language = gr.Dropdown(
187
+ label="请选择上传语音对应的语言",
188
  choices=ASR_TARGET_LANGUAGE_NAMES,
189
+ value="Mandarin Chinese",
190
  )
191
  target_language = gr.Dropdown(
192
+ label="请选择翻译后的语言",
193
  choices=S2ST_TARGET_LANGUAGE_NAMES,
194
+ value="English",
195
  )
196
+ btn = gr.Button("开始AI同声传译之旅吧")
197
+ btn_vc = gr.Button("恢复原本的音色吧!")
198
+
199
  with gr.Column():
200
  with gr.Group():
201
  output_audio = gr.Audio(
202
+ label="同声传译后的语音",
203
+ autoplay=False,
204
+ streaming=False,
205
+ type="numpy",
206
+ )
207
+ output_text = gr.Textbox(label="翻译后的文本")
208
+ audio_vc = gr.Audio(
209
+ label="相同音色的AI专属语音",
210
  autoplay=False,
211
  streaming=False,
212
  type="numpy",
213
  )
 
214
 
215
  gr.Examples(
216
  examples=[
 
232
  outputs=[output_audio, output_text],
233
  api_name="s2st",
234
  )
235
+ btn_vc.click(voice_change, [output_audio, input_audio], [audio_vc])
236
+
237
 
238
  with gr.Blocks() as demo_s2tt:
239
+
240
  with gr.Row():
241
  with gr.Column():
242
  with gr.Group():
243
+ input_audio = gr.Audio(label="请上传一段语音", type="filepath")
244
  source_language = gr.Dropdown(
245
+ label="请选择上传语音对应的语言",
246
  choices=ASR_TARGET_LANGUAGE_NAMES,
247
+ value="Mandarin Chinese",
248
  )
249
  target_language = gr.Dropdown(
250
+ label="请选择翻译后的语言",
251
  choices=S2TT_TARGET_LANGUAGE_NAMES,
252
+ value="English",
253
  )
254
+ btn = gr.Button("开始AI翻译之旅吧!")
255
  with gr.Column():
256
+ output_text = gr.Textbox(label="翻译后的文本")
257
 
258
  gr.Examples(
259
  examples=[