File size: 7,844 Bytes
e62cec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import VitsModel, AutoTokenizer\n",
    "import torchaudio\n",
    "import numpy as np\n",
    "import os\n",
    "from gtts import gTTS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Đọc và load danh sách từ các file .txt trong thư mục ../data/text\n",
    "text_folder = \"../data/text\"\n",
    "text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt')])  # Lọc các file .txt trong thư mục"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Audio saved as hello.mp3 (gTTS - Female)\n"
     ]
    }
   ],
   "source": [
    "def text_to_speech(text, filename=\"output.mp3\", gender=\"female\", speed=\"normal\"):\n",
    "    \"\"\"\n",
    "    Convert text to speech and save it as an audio file.\n",
    "    \n",
    "    Parameters:\n",
    "        text (str): The text to convert.\n",
    "        filename (str): The output file name.\n",
    "        gender (str): \"male\" (use MMS-TTS) or \"female\" (use gTTS).\n",
    "        speed (str): \"slow\", \"normal\", or \"fast\" (only for gTTS).\n",
    "    \"\"\"\n",
    "    lang = \"vi\"\n",
    "    \n",
    "    if gender.lower() == \"female\":\n",
    "        # gTTS chỉ có giọng nữ\n",
    "        speed_mapping = {\"slow\": True, \"normal\": False, \"fast\": False}\n",
    "        slow = speed_mapping.get(speed.lower(), False)\n",
    "        \n",
    "        tts = gTTS(text=text, lang=lang, slow=slow)\n",
    "        tts.save(filename)\n",
    "        print(f\"✅ Audio saved as {filename}\")\n",
    "    \n",
    "    elif gender.lower() == \"male\":\n",
    "        # MMS-TTS cho giọng nam\n",
    "        model = VitsModel.from_pretrained(\"facebook/mms-tts-vie\")\n",
    "        tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-vie\")\n",
    "        \n",
    "        inputs = tokenizer(text, return_tensors=\"pt\")\n",
    "        with torch.no_grad():\n",
    "            output = model(**inputs).waveform\n",
    "        \n",
    "        # Lưu file âm thanh\n",
    "        torchaudio.save(filename, output, 24000)\n",
    "        print(f\"✅ Audio saved as {filename}\")\n",
    "    \n",
    "    else:\n",
    "        print(\"⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Audio saved as 1_1.wav (gTTS - Female)\n",
      "Đã lưu 1_1.wav\n",
      "✅ Audio saved as 1_10.wav (gTTS - Female)\n",
      "Đã lưu 1_10.wav\n",
      "✅ Audio saved as 1_11.wav (gTTS - Female)\n",
      "Đã lưu 1_11.wav\n",
      "✅ Audio saved as 1_12.wav (gTTS - Female)\n",
      "Đã lưu 1_12.wav\n",
      "✅ Audio saved as 1_13.wav (gTTS - Female)\n",
      "Đã lưu 1_13.wav\n",
      "✅ Audio saved as 1_14.wav (gTTS - Female)\n",
      "Đã lưu 1_14.wav\n",
      "✅ Audio saved as 1_15.wav (gTTS - Female)\n",
      "Đã lưu 1_15.wav\n",
      "✅ Audio saved as 1_16.wav (gTTS - Female)\n",
      "Đã lưu 1_16.wav\n",
      "✅ Audio saved as 1_17.wav (gTTS - Female)\n",
      "Đã lưu 1_17.wav\n",
      "✅ Audio saved as 1_2.wav (gTTS - Female)\n",
      "Đã lưu 1_2.wav\n",
      "✅ Audio saved as 1_3.wav (gTTS - Female)\n",
      "Đã lưu 1_3.wav\n",
      "✅ Audio saved as 1_4.wav (gTTS - Female)\n",
      "Đã lưu 1_4.wav\n",
      "✅ Audio saved as 1_5.wav (gTTS - Female)\n",
      "Đã lưu 1_5.wav\n",
      "✅ Audio saved as 1_6.wav (gTTS - Female)\n",
      "Đã lưu 1_6.wav\n",
      "✅ Audio saved as 1_7.wav (gTTS - Female)\n",
      "Đã lưu 1_7.wav\n",
      "✅ Audio saved as 1_8.wav (gTTS - Female)\n",
      "Đã lưu 1_8.wav\n",
      "✅ Audio saved as 1_9.wav (gTTS - Female)\n",
      "Đã lưu 1_9.wav\n",
      "✅ Audio saved as 2_1.wav (gTTS - Female)\n",
      "Đã lưu 2_1.wav\n",
      "✅ Audio saved as 2_10.wav (gTTS - Female)\n",
      "Đã lưu 2_10.wav\n",
      "✅ Audio saved as 2_11.wav (gTTS - Female)\n",
      "Đã lưu 2_11.wav\n",
      "✅ Audio saved as 2_12.wav (gTTS - Female)\n",
      "Đã lưu 2_12.wav\n",
      "✅ Audio saved as 2_13.wav (gTTS - Female)\n",
      "Đã lưu 2_13.wav\n",
      "✅ Audio saved as 2_14.wav (gTTS - Female)\n",
      "Đã lưu 2_14.wav\n",
      "✅ Audio saved as 2_15.wav (gTTS - Female)\n",
      "Đã lưu 2_15.wav\n",
      "✅ Audio saved as 2_16.wav (gTTS - Female)\n",
      "Đã lưu 2_16.wav\n",
      "✅ Audio saved as 2_17.wav (gTTS - Female)\n",
      "Đã lưu 2_17.wav\n",
      "✅ Audio saved as 2_18.wav (gTTS - Female)\n",
      "Đã lưu 2_18.wav\n",
      "✅ Audio saved as 2_2.wav (gTTS - Female)\n",
      "Đã lưu 2_2.wav\n",
      "✅ Audio saved as 2_3.wav (gTTS - Female)\n",
      "Đã lưu 2_3.wav\n",
      "✅ Audio saved as 2_4.wav (gTTS - Female)\n",
      "Đã lưu 2_4.wav\n",
      "✅ Audio saved as 2_5.wav (gTTS - Female)\n",
      "Đã lưu 2_5.wav\n",
      "✅ Audio saved as 2_6.wav (gTTS - Female)\n",
      "Đã lưu 2_6.wav\n",
      "✅ Audio saved as 2_7.wav (gTTS - Female)\n",
      "Đã lưu 2_7.wav\n",
      "✅ Audio saved as 2_8.wav (gTTS - Female)\n",
      "Đã lưu 2_8.wav\n",
      "✅ Audio saved as 2_9.wav (gTTS - Female)\n",
      "Đã lưu 2_9.wav\n",
      "✅ Audio saved as 3_1.wav (gTTS - Female)\n",
      "Đã lưu 3_1.wav\n",
      "✅ Audio saved as 3_10.wav (gTTS - Female)\n",
      "Đã lưu 3_10.wav\n",
      "✅ Audio saved as 3_11.wav (gTTS - Female)\n",
      "Đã lưu 3_11.wav\n",
      "✅ Audio saved as 3_12.wav (gTTS - Female)\n",
      "Đã lưu 3_12.wav\n",
      "✅ Audio saved as 3_14.wav (gTTS - Female)\n",
      "Đã lưu 3_14.wav\n",
      "✅ Audio saved as 3_17.wav (gTTS - Female)\n",
      "Đã lưu 3_17.wav\n",
      "✅ Audio saved as 3_18.wav (gTTS - Female)\n",
      "Đã lưu 3_18.wav\n",
      "✅ Audio saved as 3_19.wav (gTTS - Female)\n",
      "Đã lưu 3_19.wav\n",
      "✅ Audio saved as 3_2.wav (gTTS - Female)\n",
      "Đã lưu 3_2.wav\n",
      "✅ Audio saved as 3_3.wav (gTTS - Female)\n",
      "Đã lưu 3_3.wav\n",
      "✅ Audio saved as 3_4.wav (gTTS - Female)\n",
      "Đã lưu 3_4.wav\n",
      "✅ Audio saved as 3_5.wav (gTTS - Female)\n",
      "Đã lưu 3_5.wav\n",
      "✅ Audio saved as 3_6.wav (gTTS - Female)\n",
      "Đã lưu 3_6.wav\n",
      "✅ Audio saved as 3_7.wav (gTTS - Female)\n",
      "Đã lưu 3_7.wav\n",
      "✅ Audio saved as 3_8.wav (gTTS - Female)\n",
      "Đã lưu 3_8.wav\n",
      "✅ Audio saved as 3_9.wav (gTTS - Female)\n",
      "Đã lưu 3_9.wav\n"
     ]
    }
   ],
   "source": [
    "for text_file in text_files:\n",
    "    with open(f\"../data/text/{text_file}\", \"r\", encoding=\"utf-8\") as file:\n",
    "        content = file.read()\n",
    "    audio_file = text_file.replace(\"txt\",\"wav\")\n",
    "    text_to_speech(content, audio_file)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}