Artificial-superintelligence commited on
Commit
7fcd4dd
·
verified ·
1 Parent(s): d535b69

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import soundfile as sf
5
+ from TTS.api import TTS
6
+ import torch
7
+ import os
8
+ import tempfile
9
+
10
+ # Initialize TTS model
11
+ tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ def load_audio(audio_path):
14
+ audio, sr = librosa.load(audio_path, sr=None)
15
+ return audio, sr
16
+
17
+ def save_audio(audio, sr, path):
18
+ sf.write(path, audio, sr)
19
+
20
+ def pitch_shift(audio, sr, n_steps):
21
+ return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
22
+
23
+ def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
24
+ # Load the audio
25
+ audio, sr = load_audio(audio_path)
26
+
27
+ # Apply pitch shifting
28
+ pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
29
+
30
+ # Use TTS model for voice conversion
31
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
32
+ save_audio(pitched_audio, sr, temp_file.name)
33
+ converted_audio_path = tts.voice_conversion(
34
+ source_wav=temp_file.name,
35
+ target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file
36
+ output_wav=None
37
+ )
38
+
39
+ # Load the converted audio
40
+ converted_audio, _ = load_audio(converted_audio_path)
41
+
42
+ # Apply formant shifting (simplified approach)
43
+ formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
44
+
45
+ # Clean up temporary files
46
+ os.unlink(temp_file.name)
47
+ os.unlink(converted_audio_path)
48
+
49
+ return (sr, formant_shifted_audio)
50
+
51
+ def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
52
+ sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
53
+
54
+ output_path = "output_voice.wav"
55
+ save_audio(audio, sr, output_path)
56
+
57
+ return output_path
58
+
59
+ # Custom CSS for improved design
60
+ custom_css = """
61
+ .gradio-container {
62
+ background-color: #f0f4f8;
63
+ }
64
+ .container {
65
+ max-width: 900px;
66
+ margin: auto;
67
+ padding: 20px;
68
+ border-radius: 10px;
69
+ background-color: white;
70
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
71
+ }
72
+ h1 {
73
+ color: #2c3e50;
74
+ text-align: center;
75
+ font-size: 2.5em;
76
+ margin-bottom: 20px;
77
+ }
78
+ .description {
79
+ text-align: center;
80
+ color: #34495e;
81
+ margin-bottom: 30px;
82
+ }
83
+ .input-section, .output-section {
84
+ background-color: #ecf0f1;
85
+ padding: 20px;
86
+ border-radius: 8px;
87
+ margin-bottom: 20px;
88
+ }
89
+ .input-section h3, .output-section h3 {
90
+ color: #2980b9;
91
+ margin-bottom: 15px;
92
+ }
93
+ """
94
+
95
+ # Gradio Interface with improved design
96
+ with gr.Blocks(css=custom_css) as demo:
97
+ gr.HTML(
98
+ """
99
+ <div style="text-align: center; max-width: 800px; margin: 0 auto;">
100
+ <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
101
+ <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
102
+ <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
103
+ <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
104
+ </svg>
105
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
106
+ AI Voice Changer
107
+ </h1>
108
+ </div>
109
+ <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
110
+ </div>
111
+ """
112
+ )
113
+
114
+ with gr.Row():
115
+ with gr.Column(elem_classes="input-section"):
116
+ gr.Markdown("### Input")
117
+ audio_input = gr.Audio(type="filepath", label="Upload Voice")
118
+ pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
119
+ formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
120
+ submit_btn = gr.Button("Transform Voice", variant="primary")
121
+
122
+ with gr.Column(elem_classes="output-section"):
123
+ gr.Markdown("### Output")
124
+ audio_output = gr.Audio(label="Transformed Voice")
125
+
126
+ submit_btn.click(
127
+ fn=process_audio,
128
+ inputs=[audio_input, pitch_shift, formant_shift],
129
+ outputs=audio_output,
130
+ )
131
+
132
+ gr.Markdown(
133
+ """
134
+ ### How to use:
135
+ 1. Upload an audio file containing the voice you want to transform.
136
+ 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
137
+ 3. Click the "Transform Voice" button to process the audio.
138
+ 4. Listen to the transformed voice in the output section.
139
+ 5. Download the transformed audio file if desired.
140
+
141
+ Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
142
+ """
143
+ )
144
+
145
+ if __name__ == "__main__":
146
+ demo.launch()