{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import VitsModel, AutoTokenizer\n", "import torchaudio\n", "import numpy as np\n", "import os\n", "from gtts import gTTS" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Đọc và load danh sách từ các file .txt trong thư mục ../data/text\n", "text_folder = \"../data/text\"\n", "text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt')]) # Lọc các file .txt trong thư mục" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Audio saved as hello.mp3 (gTTS - Female)\n" ] } ], "source": [ "def text_to_speech(text, filename=\"output.mp3\", gender=\"female\", speed=\"normal\"):\n", " \"\"\"\n", " Convert text to speech and save it as an audio file.\n", " \n", " Parameters:\n", " text (str): The text to convert.\n", " filename (str): The output file name.\n", " gender (str): \"male\" (use MMS-TTS) or \"female\" (use gTTS).\n", " speed (str): \"slow\", \"normal\", or \"fast\" (only for gTTS).\n", " \"\"\"\n", " lang = \"vi\"\n", " \n", " if gender.lower() == \"female\":\n", " # gTTS chỉ có giọng nữ\n", " speed_mapping = {\"slow\": True, \"normal\": False, \"fast\": False}\n", " slow = speed_mapping.get(speed.lower(), False)\n", " \n", " tts = gTTS(text=text, lang=lang, slow=slow)\n", " tts.save(filename)\n", " print(f\"✅ Audio saved as {filename}\")\n", " \n", " elif gender.lower() == \"male\":\n", " # MMS-TTS cho giọng nam\n", " model = VitsModel.from_pretrained(\"facebook/mms-tts-vie\")\n", " tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-vie\")\n", " \n", " inputs = tokenizer(text, return_tensors=\"pt\")\n", " with torch.no_grad():\n", " output = model(**inputs).waveform\n", " \n", " # Lưu file âm thanh\n", " torchaudio.save(filename, output, 24000)\n", " print(f\"✅ Audio saved as {filename}\")\n", " \n", " else:\n", " print(\"⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Audio saved as 1_1.wav (gTTS - Female)\n", "Đã lưu 1_1.wav\n", "✅ Audio saved as 1_10.wav (gTTS - Female)\n", "Đã lưu 1_10.wav\n", "✅ Audio saved as 1_11.wav (gTTS - Female)\n", "Đã lưu 1_11.wav\n", "✅ Audio saved as 1_12.wav (gTTS - Female)\n", "Đã lưu 1_12.wav\n", "✅ Audio saved as 1_13.wav (gTTS - Female)\n", "Đã lưu 1_13.wav\n", "✅ Audio saved as 1_14.wav (gTTS - Female)\n", "Đã lưu 1_14.wav\n", "✅ Audio saved as 1_15.wav (gTTS - Female)\n", "Đã lưu 1_15.wav\n", "✅ Audio saved as 1_16.wav (gTTS - Female)\n", "Đã lưu 1_16.wav\n", "✅ Audio saved as 1_17.wav (gTTS - Female)\n", "Đã lưu 1_17.wav\n", "✅ Audio saved as 1_2.wav (gTTS - Female)\n", "Đã lưu 1_2.wav\n", "✅ Audio saved as 1_3.wav (gTTS - Female)\n", "Đã lưu 1_3.wav\n", "✅ Audio saved as 1_4.wav (gTTS - Female)\n", "Đã lưu 1_4.wav\n", "✅ Audio saved as 1_5.wav (gTTS - Female)\n", "Đã lưu 1_5.wav\n", "✅ Audio saved as 1_6.wav (gTTS - Female)\n", "Đã lưu 1_6.wav\n", "✅ Audio saved as 1_7.wav (gTTS - Female)\n", "Đã lưu 1_7.wav\n", "✅ Audio saved as 1_8.wav (gTTS - Female)\n", "Đã lưu 1_8.wav\n", "✅ Audio saved as 1_9.wav (gTTS - Female)\n", "Đã lưu 1_9.wav\n", "✅ Audio saved as 2_1.wav (gTTS - Female)\n", "Đã lưu 2_1.wav\n", "✅ Audio saved as 2_10.wav (gTTS - Female)\n", "Đã lưu 2_10.wav\n", "✅ Audio saved as 2_11.wav (gTTS - Female)\n", "Đã lưu 2_11.wav\n", "✅ Audio saved as 2_12.wav (gTTS - Female)\n", "Đã lưu 2_12.wav\n", "✅ Audio saved as 2_13.wav (gTTS - Female)\n", "Đã lưu 2_13.wav\n", "✅ Audio saved as 2_14.wav (gTTS - Female)\n", "Đã lưu 2_14.wav\n", "✅ Audio saved as 2_15.wav (gTTS - Female)\n", "Đã lưu 2_15.wav\n", "✅ Audio saved as 2_16.wav (gTTS - Female)\n", "Đã lưu 2_16.wav\n", "✅ Audio saved as 2_17.wav (gTTS - Female)\n", "Đã lưu 2_17.wav\n", "✅ Audio saved as 2_18.wav (gTTS - Female)\n", "Đã lưu 2_18.wav\n", "✅ Audio saved as 2_2.wav (gTTS - Female)\n", "Đã lưu 2_2.wav\n", "✅ Audio saved as 2_3.wav (gTTS - Female)\n", "Đã lưu 2_3.wav\n", "✅ Audio saved as 2_4.wav (gTTS - Female)\n", "Đã lưu 2_4.wav\n", "✅ Audio saved as 2_5.wav (gTTS - Female)\n", "Đã lưu 2_5.wav\n", "✅ Audio saved as 2_6.wav (gTTS - Female)\n", "Đã lưu 2_6.wav\n", "✅ Audio saved as 2_7.wav (gTTS - Female)\n", "Đã lưu 2_7.wav\n", "✅ Audio saved as 2_8.wav (gTTS - Female)\n", "Đã lưu 2_8.wav\n", "✅ Audio saved as 2_9.wav (gTTS - Female)\n", "Đã lưu 2_9.wav\n", "✅ Audio saved as 3_1.wav (gTTS - Female)\n", "Đã lưu 3_1.wav\n", "✅ Audio saved as 3_10.wav (gTTS - Female)\n", "Đã lưu 3_10.wav\n", "✅ Audio saved as 3_11.wav (gTTS - Female)\n", "Đã lưu 3_11.wav\n", "✅ Audio saved as 3_12.wav (gTTS - Female)\n", "Đã lưu 3_12.wav\n", "✅ Audio saved as 3_14.wav (gTTS - Female)\n", "Đã lưu 3_14.wav\n", "✅ Audio saved as 3_17.wav (gTTS - Female)\n", "Đã lưu 3_17.wav\n", "✅ Audio saved as 3_18.wav (gTTS - Female)\n", "Đã lưu 3_18.wav\n", "✅ Audio saved as 3_19.wav (gTTS - Female)\n", "Đã lưu 3_19.wav\n", "✅ Audio saved as 3_2.wav (gTTS - Female)\n", "Đã lưu 3_2.wav\n", "✅ Audio saved as 3_3.wav (gTTS - Female)\n", "Đã lưu 3_3.wav\n", "✅ Audio saved as 3_4.wav (gTTS - Female)\n", "Đã lưu 3_4.wav\n", "✅ Audio saved as 3_5.wav (gTTS - Female)\n", "Đã lưu 3_5.wav\n", "✅ Audio saved as 3_6.wav (gTTS - Female)\n", "Đã lưu 3_6.wav\n", "✅ Audio saved as 3_7.wav (gTTS - Female)\n", "Đã lưu 3_7.wav\n", "✅ Audio saved as 3_8.wav (gTTS - Female)\n", "Đã lưu 3_8.wav\n", "✅ Audio saved as 3_9.wav (gTTS - Female)\n", "Đã lưu 3_9.wav\n" ] } ], "source": [ "for text_file in text_files:\n", " with open(f\"../data/text/{text_file}\", \"r\", encoding=\"utf-8\") as file:\n", " content = file.read()\n", " audio_file = text_file.replace(\"txt\",\"wav\")\n", " text_to_speech(content, audio_file)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }