noahabebe commited on
Commit
8dd00fc
·
verified ·
1 Parent(s): 72d4d84

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import os
4
+ from pathlib import Path
5
+ from synthesizer.inference import Synthesizer
6
+ from encoder import inference as encoder
7
+ from vocoder import inference as vocoder
8
+ from pydub import AudioSegment
9
+
10
+ # Load the models
11
+ project_name = "Real-Time-Voice-Cloning"
12
+ encoder.load_model(Path(project_name) / "encoder/saved_models/pretrained.pt")
13
+ synthesizer = Synthesizer(Path(project_name) / "synthesizer/saved_models/pretrained/pretrained.pt")
14
+ vocoder.load_model(Path(project_name) / "vocoder/saved_models/pretrained/pretrained.pt")
15
+
16
+ def clone_voice(text, reference_audio):
17
+ # Save the uploaded reference audio
18
+ audio_path = "reference_audio.wav"
19
+ reference_audio.export(audio_path, format="wav")
20
+
21
+ # Process the audio to extract embedding
22
+ audio = encoder.preprocess_wav(audio_path)
23
+ embedding = encoder.embed_utterance(audio)
24
+
25
+ # Synthesize the new speech
26
+ specs = synthesizer.synthesize_spectrograms([text], [embedding])
27
+ generated_wav = vocoder.infer_waveform(specs[0])
28
+
29
+ # Save and return the generated audio
30
+ output_path = "output.wav"
31
+ generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
32
+ AudioSegment(generated_wav, frame_rate=synthesizer.sample_rate, sample_width=2, channels=1).export(output_path, format="wav")
33
+ return output_path
34
+
35
+ iface = gr.Interface(
36
+ fn=clone_voice,
37
+ inputs=[gr.Textbox(label="Text"), gr.Audio(label="Reference Audio", type="file")],
38
+ outputs=gr.Audio(label="Generated Audio"),
39
+ title="Real-Time Voice Cloning",
40
+ description="Generate new speech using a reference audio sample and provided text."
41
+ )
42
+
43
+ if __name__ == "__main__":
44
+ iface.launch()