Artificial-superintelligence commited on
Commit
e86aa69
·
verified ·
1 Parent(s): fbf5ae6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -76
app.py CHANGED
@@ -1,76 +1,168 @@
1
- runtime error
2
- Exit code: 1. Reason: esample:False
3
- | > num_mels:64
4
- | > log_func:np.log10
5
- | > min_level_db:-100
6
- | > frame_shift_ms:None
7
- | > frame_length_ms:None
8
- | > ref_level_db:20
9
- | > fft_size:512
10
- | > power:1.5
11
- | > preemphasis:0.97
12
- | > griffin_lim_iters:60
13
- | > signal_norm:False
14
- | > symmetric_norm:False
15
- | > mel_fmin:0
16
- | > mel_fmax:8000.0
17
- | > pitch_fmin:1.0
18
- | > pitch_fmax:640.0
19
- | > spec_gain:20.0
20
- | > stft_pad_mode:reflect
21
- | > max_norm:4.0
22
- | > clip_norm:False
23
- | > do_trim_silence:False
24
- | > trim_db:60
25
- | > do_sound_norm:False
26
- | > do_amp_to_db_linear:True
27
- | > do_amp_to_db_mel:True
28
- | > do_rms_norm:True
29
- | > db_level:-27.0
30
- | > stats_path:None
31
- | > base:10
32
- | > hop_length:160
33
- | > win_length:400
34
- > External Speaker Encoder Loaded !!
35
- > initialization of language-embedding layers.
36
- /usr/local/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
37
- WeightNorm.apply(module, name, dim)
38
- > Model fully restored.
39
- > Setting up Audio Processor...
40
- | > sample_rate:16000
41
- | > resample:False
42
- | > num_mels:64
43
- | > log_func:np.log10
44
- | > min_level_db:-100
45
- | > frame_shift_ms:None
46
- | > frame_length_ms:None
47
- | > ref_level_db:20
48
- | > fft_size:512
49
- | > power:1.5
50
- | > preemphasis:0.97
51
- | > griffin_lim_iters:60
52
- | > signal_norm:False
53
- | > symmetric_norm:False
54
- | > mel_fmin:0
55
- | > mel_fmax:8000.0
56
- | > pitch_fmin:1.0
57
- | > pitch_fmax:640.0
58
- | > spec_gain:20.0
59
- | > stft_pad_mode:reflect
60
- | > max_norm:4.0
61
- | > clip_norm:False
62
- | > do_trim_silence:False
63
- | > trim_db:60
64
- | > do_sound_norm:False
65
- | > do_amp_to_db_linear:True
66
- | > do_amp_to_db_mel:True
67
- | > do_rms_norm:True
68
- | > db_level:-27.0
69
- | > stats_path:None
70
- | > base:10
71
- | > hop_length:160
72
- | > win_length:400
73
- Traceback (most recent call last):
74
- File "/home/user/app/app.py", line 11, in <module>
75
- tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
76
- AttributeError: 'TTS' object has no attribute 'to'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import soundfile as sf
5
+ from TTS.api import TTS
6
+ import torch
7
+ import os
8
+ import tempfile
9
+
10
+ # Initialize TTS model
11
+ try:
12
+ tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
13
+ except Exception as e:
14
+ print(f"Error initializing TTS model: {e}")
15
+ tts = None
16
+
17
+ def load_audio(audio_path):
18
+ try:
19
+ audio, sr = librosa.load(audio_path, sr=None)
20
+ return audio, sr
21
+ except Exception as e:
22
+ print(f"Error loading audio: {e}")
23
+ return None, None
24
+
25
+ def save_audio(audio, sr, path):
26
+ try:
27
+ sf.write(path, audio, sr)
28
+ except Exception as e:
29
+ print(f"Error saving audio: {e}")
30
+
31
+ def pitch_shift(audio, sr, n_steps):
32
+ try:
33
+ return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
34
+ except Exception as e:
35
+ print(f"Error in pitch shifting: {e}")
36
+ return audio
37
+
38
+ def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
39
+ if tts is None:
40
+ return None, None
41
+
42
+ audio, sr = load_audio(audio_path)
43
+ if audio is None or sr is None:
44
+ return None, None
45
+
46
+ pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
47
+
48
+ try:
49
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
50
+ save_audio(pitched_audio, sr, temp_file.name)
51
+ converted_audio_path = tts.voice_conversion(
52
+ source_wav=temp_file.name,
53
+ target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file
54
+ output_wav=None
55
+ )
56
+
57
+ converted_audio, _ = load_audio(converted_audio_path)
58
+ formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
59
+
60
+ os.unlink(temp_file.name)
61
+ os.unlink(converted_audio_path)
62
+
63
+ return sr, formant_shifted_audio
64
+ except Exception as e:
65
+ print(f"Error in voice conversion: {e}")
66
+ return None, None
67
+
68
+ def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
69
+ if audio_file is None:
70
+ return None
71
+
72
+ sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
73
+ if sr is None or audio is None:
74
+ return None
75
+
76
+ output_path = "output_voice.wav"
77
+ save_audio(audio, sr, output_path)
78
+
79
+ return output_path
80
+
81
+ # Custom CSS for improved design
82
+ custom_css = """
83
+ .gradio-container {
84
+ background-color: #f0f4f8;
85
+ }
86
+ .container {
87
+ max-width: 900px;
88
+ margin: auto;
89
+ padding: 20px;
90
+ border-radius: 10px;
91
+ background-color: white;
92
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
93
+ }
94
+ h1 {
95
+ color: #2c3e50;
96
+ text-align: center;
97
+ font-size: 2.5em;
98
+ margin-bottom: 20px;
99
+ }
100
+ .description {
101
+ text-align: center;
102
+ color: #34495e;
103
+ margin-bottom: 30px;
104
+ }
105
+ .input-section, .output-section {
106
+ background-color: #ecf0f1;
107
+ padding: 20px;
108
+ border-radius: 8px;
109
+ margin-bottom: 20px;
110
+ }
111
+ .input-section h3, .output-section h3 {
112
+ color: #2980b9;
113
+ margin-bottom: 15px;
114
+ }
115
+ """
116
+
117
+ # Gradio Interface with improved design
118
+ with gr.Blocks(css=custom_css) as demo:
119
+ gr.HTML(
120
+ """
121
+ <div style="text-align: center; max-width: 800px; margin: 0 auto;">
122
+ <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
123
+ <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
124
+ <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
125
+ <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
126
+ </svg>
127
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
128
+ AI Voice Changer
129
+ </h1>
130
+ </div>
131
+ <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
132
+ </div>
133
+ """
134
+ )
135
+
136
+ with gr.Row():
137
+ with gr.Column(elem_classes="input-section"):
138
+ gr.Markdown("### Input")
139
+ audio_input = gr.Audio(type="filepath", label="Upload Voice")
140
+ pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
141
+ formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
142
+ submit_btn = gr.Button("Transform Voice", variant="primary")
143
+
144
+ with gr.Column(elem_classes="output-section"):
145
+ gr.Markdown("### Output")
146
+ audio_output = gr.Audio(label="Transformed Voice")
147
+
148
+ submit_btn.click(
149
+ fn=process_audio,
150
+ inputs=[audio_input, pitch_shift, formant_shift],
151
+ outputs=audio_output,
152
+ )
153
+
154
+ gr.Markdown(
155
+ """
156
+ ### How to use:
157
+ 1. Upload an audio file containing the voice you want to transform.
158
+ 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
159
+ 3. Click the "Transform Voice" button to process the audio.
160
+ 4. Listen to the transformed voice in the output section.
161
+ 5. Download the transformed audio file if desired.
162
+
163
+ Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
164
+ """
165
+ )
166
+
167
+ if __name__ == "__main__":
168
+ demo.launch()