Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,9 @@ from pydub import AudioSegment
|
|
| 7 |
import tempfile
|
| 8 |
from scipy.io.wavfile import write, read
|
| 9 |
from TTS.api import TTS
|
|
|
|
| 10 |
|
| 11 |
-
#
|
| 12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 13 |
|
| 14 |
# Глобальные переменные и настройки
|
|
@@ -59,49 +60,63 @@ def check_audio_length(audio_path, max_duration=120):
|
|
| 59 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
| 60 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
| 61 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
|
| 66 |
temp_tts_wav_path = temp_tts_wav_file.name
|
| 67 |
-
write(temp_tts_wav_path, 22050,
|
| 68 |
|
| 69 |
-
#
|
| 70 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
| 71 |
temp_output_wav_path = temp_output_wav_file.name
|
| 72 |
|
|
|
|
| 73 |
tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
|
| 74 |
file_path=temp_output_wav_path)
|
| 75 |
|
| 76 |
-
#
|
| 77 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
| 78 |
|
| 79 |
-
#
|
| 80 |
os.remove(temp_tts_wav_path)
|
| 81 |
os.remove(temp_output_wav_path)
|
| 82 |
|
| 83 |
return (output_sample_rate, output_audio_data)
|
| 84 |
|
| 85 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
| 86 |
-
#
|
| 87 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
|
| 88 |
temp_tts_output_path = temp_tts_output.name
|
| 89 |
tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
|
| 90 |
-
|
| 91 |
|
| 92 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
| 93 |
|
| 94 |
-
#
|
| 95 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
| 96 |
temp_output_wav_path = temp_output_wav_file.name
|
| 97 |
|
|
|
|
| 98 |
tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
|
| 99 |
-
|
| 100 |
|
| 101 |
-
#
|
| 102 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
| 103 |
|
| 104 |
-
#
|
| 105 |
os.remove(temp_tts_output_path)
|
| 106 |
os.remove(temp_output_wav_path)
|
| 107 |
|
|
@@ -126,7 +141,7 @@ def process_speech(text, speaker_wav_path, selected_language, speed):
|
|
| 126 |
error = gr.Error(error_message, duration=5)
|
| 127 |
raise error
|
| 128 |
|
| 129 |
-
#
|
| 130 |
audio = AudioSegment.from_file(speaker_wav_path)
|
| 131 |
duration = audio.duration_seconds
|
| 132 |
if duration > 120:
|
|
@@ -273,7 +288,7 @@ with gr.Blocks() as app:
|
|
| 273 |
|
| 274 |
def launch_gradio():
|
| 275 |
app.launch(
|
| 276 |
-
|
| 277 |
)
|
| 278 |
|
| 279 |
if __name__ == "__main__":
|
|
|
|
| 7 |
import tempfile
|
| 8 |
from scipy.io.wavfile import write, read
|
| 9 |
from TTS.api import TTS
|
| 10 |
+
import numpy as np # Добавлен импорт NumPy
|
| 11 |
|
| 12 |
+
# Установка переменных окружения для принятия лицензионных условий
|
| 13 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 14 |
|
| 15 |
# Глобальные переменные и настройки
|
|
|
|
| 60 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
| 61 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
| 62 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
| 63 |
+
|
| 64 |
+
# Преобразование wav_data из списка в NumPy массив с типом float32
|
| 65 |
+
wav_data_np = np.array(wav_data, dtype=np.float32)
|
| 66 |
+
|
| 67 |
+
# Нормализация данных, если необходимо
|
| 68 |
+
max_val = np.max(np.abs(wav_data_np))
|
| 69 |
+
if max_val > 1.0:
|
| 70 |
+
wav_data_np = wav_data_np / max_val
|
| 71 |
+
|
| 72 |
+
# Масштабирование до int16 для записи в WAV файл
|
| 73 |
+
wav_data_int16 = np.int16(wav_data_np * 32767)
|
| 74 |
+
|
| 75 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
| 76 |
|
| 77 |
+
# Запись wav_data_int16 во временный файл
|
| 78 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
|
| 79 |
temp_tts_wav_path = temp_tts_wav_file.name
|
| 80 |
+
write(temp_tts_wav_path, 22050, wav_data_int16) # Используем массив int16
|
| 81 |
|
| 82 |
+
# Подготовка временного выходного файла
|
| 83 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
| 84 |
temp_output_wav_path = temp_output_wav_file.name
|
| 85 |
|
| 86 |
+
# Преобразование голоса
|
| 87 |
tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
|
| 88 |
file_path=temp_output_wav_path)
|
| 89 |
|
| 90 |
+
# Чтение преобразованного аудио из temp_output_wav_path
|
| 91 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
| 92 |
|
| 93 |
+
# Удаление временных файлов
|
| 94 |
os.remove(temp_tts_wav_path)
|
| 95 |
os.remove(temp_output_wav_path)
|
| 96 |
|
| 97 |
return (output_sample_rate, output_audio_data)
|
| 98 |
|
| 99 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
| 100 |
+
# Генерация речи с помощью tts и сохранение во временный файл
|
| 101 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
|
| 102 |
temp_tts_output_path = temp_tts_output.name
|
| 103 |
tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
|
| 104 |
+
speaker_wav=speaker_wav_path, language=language_iso)
|
| 105 |
|
| 106 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
| 107 |
|
| 108 |
+
# Подготовка временного выходного файла
|
| 109 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
| 110 |
temp_output_wav_path = temp_output_wav_file.name
|
| 111 |
|
| 112 |
+
# Преобразование голоса
|
| 113 |
tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
|
| 114 |
+
file_path=temp_output_wav_path)
|
| 115 |
|
| 116 |
+
# Чтение преобразованного аудио из temp_output_wav_path
|
| 117 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
| 118 |
|
| 119 |
+
# Удаление временных файлов
|
| 120 |
os.remove(temp_tts_output_path)
|
| 121 |
os.remove(temp_output_wav_path)
|
| 122 |
|
|
|
|
| 141 |
error = gr.Error(error_message, duration=5)
|
| 142 |
raise error
|
| 143 |
|
| 144 |
+
# Проверка длины аудио
|
| 145 |
audio = AudioSegment.from_file(speaker_wav_path)
|
| 146 |
duration = audio.duration_seconds
|
| 147 |
if duration > 120:
|
|
|
|
| 288 |
|
| 289 |
def launch_gradio():
|
| 290 |
app.launch(
|
| 291 |
+
# Вы можете добавить параметры запуска здесь, если необходимо
|
| 292 |
)
|
| 293 |
|
| 294 |
if __name__ == "__main__":
|