Artificial-superintelligence commited on
Commit
fbf5ae6
·
verified ·
1 Parent(s): 7223dab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -146
app.py CHANGED
@@ -1,146 +1,76 @@
1
- import gradio as gr
2
- import numpy as np
3
- import librosa
4
- import soundfile as sf
5
- from TTS.api import TTS
6
- import torch
7
- import os
8
- import tempfile
9
-
10
- # Initialize TTS model
11
- tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
12
-
13
- def load_audio(audio_path):
14
- audio, sr = librosa.load(audio_path, sr=None)
15
- return audio, sr
16
-
17
- def save_audio(audio, sr, path):
18
- sf.write(path, audio, sr)
19
-
20
- def pitch_shift(audio, sr, n_steps):
21
- return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
22
-
23
- def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
24
- # Load the audio
25
- audio, sr = load_audio(audio_path)
26
-
27
- # Apply pitch shifting
28
- pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
29
-
30
- # Use TTS model for voice conversion
31
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
32
- save_audio(pitched_audio, sr, temp_file.name)
33
- converted_audio_path = tts.voice_conversion(
34
- source_wav=temp_file.name,
35
- target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file
36
- output_wav=None
37
- )
38
-
39
- # Load the converted audio
40
- converted_audio, _ = load_audio(converted_audio_path)
41
-
42
- # Apply formant shifting (simplified approach)
43
- formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
44
-
45
- # Clean up temporary files
46
- os.unlink(temp_file.name)
47
- os.unlink(converted_audio_path)
48
-
49
- return (sr, formant_shifted_audio)
50
-
51
- def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
52
- sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
53
-
54
- output_path = "output_voice.wav"
55
- save_audio(audio, sr, output_path)
56
-
57
- return output_path
58
-
59
- # Custom CSS for improved design
60
- custom_css = """
61
- .gradio-container {
62
- background-color: #f0f4f8;
63
- }
64
- .container {
65
- max-width: 900px;
66
- margin: auto;
67
- padding: 20px;
68
- border-radius: 10px;
69
- background-color: white;
70
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
71
- }
72
- h1 {
73
- color: #2c3e50;
74
- text-align: center;
75
- font-size: 2.5em;
76
- margin-bottom: 20px;
77
- }
78
- .description {
79
- text-align: center;
80
- color: #34495e;
81
- margin-bottom: 30px;
82
- }
83
- .input-section, .output-section {
84
- background-color: #ecf0f1;
85
- padding: 20px;
86
- border-radius: 8px;
87
- margin-bottom: 20px;
88
- }
89
- .input-section h3, .output-section h3 {
90
- color: #2980b9;
91
- margin-bottom: 15px;
92
- }
93
- """
94
-
95
- # Gradio Interface with improved design
96
- with gr.Blocks(css=custom_css) as demo:
97
- gr.HTML(
98
- """
99
- <div style="text-align: center; max-width: 800px; margin: 0 auto;">
100
- <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
101
- <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
102
- <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
103
- <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
104
- </svg>
105
- <h1 style="font-weight: 900; margin-bottom: 7px;">
106
- AI Voice Changer
107
- </h1>
108
- </div>
109
- <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
110
- </div>
111
- """
112
- )
113
-
114
- with gr.Row():
115
- with gr.Column(elem_classes="input-section"):
116
- gr.Markdown("### Input")
117
- audio_input = gr.Audio(type="filepath", label="Upload Voice")
118
- pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
119
- formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
120
- submit_btn = gr.Button("Transform Voice", variant="primary")
121
-
122
- with gr.Column(elem_classes="output-section"):
123
- gr.Markdown("### Output")
124
- audio_output = gr.Audio(label="Transformed Voice")
125
-
126
- submit_btn.click(
127
- fn=process_audio,
128
- inputs=[audio_input, pitch_shift, formant_shift],
129
- outputs=audio_output,
130
- )
131
-
132
- gr.Markdown(
133
- """
134
- ### How to use:
135
- 1. Upload an audio file containing the voice you want to transform.
136
- 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
137
- 3. Click the "Transform Voice" button to process the audio.
138
- 4. Listen to the transformed voice in the output section.
139
- 5. Download the transformed audio file if desired.
140
-
141
- Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
142
- """
143
- )
144
-
145
- if __name__ == "__main__":
146
- demo.launch()
 
1
+ runtime error
2
+ Exit code: 1. Reason: esample:False
3
+ | > num_mels:64
4
+ | > log_func:np.log10
5
+ | > min_level_db:-100
6
+ | > frame_shift_ms:None
7
+ | > frame_length_ms:None
8
+ | > ref_level_db:20
9
+ | > fft_size:512
10
+ | > power:1.5
11
+ | > preemphasis:0.97
12
+ | > griffin_lim_iters:60
13
+ | > signal_norm:False
14
+ | > symmetric_norm:False
15
+ | > mel_fmin:0
16
+ | > mel_fmax:8000.0
17
+ | > pitch_fmin:1.0
18
+ | > pitch_fmax:640.0
19
+ | > spec_gain:20.0
20
+ | > stft_pad_mode:reflect
21
+ | > max_norm:4.0
22
+ | > clip_norm:False
23
+ | > do_trim_silence:False
24
+ | > trim_db:60
25
+ | > do_sound_norm:False
26
+ | > do_amp_to_db_linear:True
27
+ | > do_amp_to_db_mel:True
28
+ | > do_rms_norm:True
29
+ | > db_level:-27.0
30
+ | > stats_path:None
31
+ | > base:10
32
+ | > hop_length:160
33
+ | > win_length:400
34
+ > External Speaker Encoder Loaded !!
35
+ > initialization of language-embedding layers.
36
+ /usr/local/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
37
+ WeightNorm.apply(module, name, dim)
38
+ > Model fully restored.
39
+ > Setting up Audio Processor...
40
+ | > sample_rate:16000
41
+ | > resample:False
42
+ | > num_mels:64
43
+ | > log_func:np.log10
44
+ | > min_level_db:-100
45
+ | > frame_shift_ms:None
46
+ | > frame_length_ms:None
47
+ | > ref_level_db:20
48
+ | > fft_size:512
49
+ | > power:1.5
50
+ | > preemphasis:0.97
51
+ | > griffin_lim_iters:60
52
+ | > signal_norm:False
53
+ | > symmetric_norm:False
54
+ | > mel_fmin:0
55
+ | > mel_fmax:8000.0
56
+ | > pitch_fmin:1.0
57
+ | > pitch_fmax:640.0
58
+ | > spec_gain:20.0
59
+ | > stft_pad_mode:reflect
60
+ | > max_norm:4.0
61
+ | > clip_norm:False
62
+ | > do_trim_silence:False
63
+ | > trim_db:60
64
+ | > do_sound_norm:False
65
+ | > do_amp_to_db_linear:True
66
+ | > do_amp_to_db_mel:True
67
+ | > do_rms_norm:True
68
+ | > db_level:-27.0
69
+ | > stats_path:None
70
+ | > base:10
71
+ | > hop_length:160
72
+ | > win_length:400
73
+ Traceback (most recent call last):
74
+ File "/home/user/app/app.py", line 11, in <module>
75
+ tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
76
+ AttributeError: 'TTS' object has no attribute 'to'