Create tts_script.py
Browse files- tts_script.py +362 -0
tts_script.py
ADDED
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import requests
|
3 |
+
import pathlib
|
4 |
+
from io import BytesIO
|
5 |
+
from playsound import playsound
|
6 |
+
from webscout import exceptions
|
7 |
+
from webscout.AIbase import TTSProvider
|
8 |
+
from webscout.litagent import LitAgent
|
9 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
10 |
+
"""
|
11 |
+
Text processing utilities for TTS providers.
|
12 |
+
"""
|
13 |
+
from typing import List, Dict, Tuple, Set, Optional, Pattern
|
14 |
+
import re
|
15 |
+
|
16 |
+
|
17 |
+
class SentenceTokenizer:
|
18 |
+
"""Advanced sentence tokenizer with support for complex cases and proper formatting."""
|
19 |
+
|
20 |
+
def __init__(self) -> None:
|
21 |
+
# Common abbreviations by category
|
22 |
+
self.TITLES: Set[str] = {
|
23 |
+
'mr', 'mrs', 'ms', 'dr', 'prof', 'rev', 'sr', 'jr', 'esq',
|
24 |
+
'hon', 'pres', 'gov', 'atty', 'supt', 'det', 'rev', 'col','maj', 'gen', 'capt', 'cmdr',
|
25 |
+
'lt', 'sgt', 'cpl', 'pvt'
|
26 |
+
}
|
27 |
+
|
28 |
+
self.ACADEMIC: Set[str] = {
|
29 |
+
'ph.d', 'phd', 'm.d', 'md', 'b.a', 'ba', 'm.a', 'ma', 'd.d.s', 'dds',
|
30 |
+
'm.b.a', 'mba', 'b.sc', 'bsc', 'm.sc', 'msc', 'llb', 'll.b', 'bl'
|
31 |
+
}
|
32 |
+
|
33 |
+
self.ORGANIZATIONS: Set[str] = {
|
34 |
+
'inc', 'ltd', 'co', 'corp', 'llc', 'llp', 'assn', 'bros', 'plc', 'cos',
|
35 |
+
'intl', 'dept', 'est', 'dist', 'mfg', 'div'
|
36 |
+
}
|
37 |
+
|
38 |
+
self.MONTHS: Set[str] = {
|
39 |
+
'jan', 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'
|
40 |
+
}
|
41 |
+
|
42 |
+
self.UNITS: Set[str] = {
|
43 |
+
'oz', 'pt', 'qt', 'gal', 'ml', 'cc', 'km', 'cm', 'mm', 'ft', 'in',
|
44 |
+
'kg', 'lb', 'lbs', 'hz', 'khz', 'mhz', 'ghz', 'kb', 'mb', 'gb', 'tb'
|
45 |
+
}
|
46 |
+
|
47 |
+
self.TECHNOLOGY: Set[str] = {
|
48 |
+
'v', 'ver', 'app', 'sys', 'dir', 'exe', 'lib', 'api', 'sdk', 'url',
|
49 |
+
'cpu', 'gpu', 'ram', 'rom', 'hdd', 'ssd', 'lan', 'wan', 'sql', 'html'
|
50 |
+
}
|
51 |
+
|
52 |
+
self.MISC: Set[str] = {
|
53 |
+
'vs', 'etc', 'ie', 'eg', 'no', 'al', 'ca', 'cf', 'pp', 'est', 'st',
|
54 |
+
'approx', 'appt', 'apt', 'dept', 'depts', 'min', 'max', 'avg'
|
55 |
+
}
|
56 |
+
|
57 |
+
# Combine all abbreviations
|
58 |
+
self.all_abbreviations: Set[str] = (
|
59 |
+
self.TITLES | self.ACADEMIC | self.ORGANIZATIONS |
|
60 |
+
self.MONTHS | self.UNITS | self.TECHNOLOGY | self.MISC
|
61 |
+
)
|
62 |
+
|
63 |
+
# Special patterns
|
64 |
+
self.ELLIPSIS: str = r'\.{2,}|…'
|
65 |
+
self.URL_PATTERN: str = (
|
66 |
+
r'(?:https?:\/\/|www\.)[\w\-\.]+\.[a-zA-Z]{2,}(?:\/[^\s]*)?'
|
67 |
+
)
|
68 |
+
self.EMAIL_PATTERN: str = r'[\w\.-]+@[\w\.-]+\.\w+'
|
69 |
+
self.NUMBER_PATTERN: str = (
|
70 |
+
r'\d+(?:\.\d+)?(?:%|°|km|cm|mm|m|kg|g|lb|ft|in|mph|kmh|hz|mhz|ghz)?'
|
71 |
+
)
|
72 |
+
|
73 |
+
# Quote and bracket pairs
|
74 |
+
self.QUOTE_PAIRS: Dict[str, str] = {
|
75 |
+
'"': '"', "'": "'", '"': '"', "「": "」", "『": "』",
|
76 |
+
"«": "»", "‹": "›", "'": "'", "‚": "'"
|
77 |
+
}
|
78 |
+
|
79 |
+
self.BRACKETS: Dict[str, str] = {
|
80 |
+
'(': ')', '[': ']', '{': '}', '⟨': '⟩', '「': '」',
|
81 |
+
'『': '』', '【': '】', '〖': '〗', '「': '」'
|
82 |
+
}
|
83 |
+
|
84 |
+
# Compile regex patterns
|
85 |
+
self._compile_patterns()
|
86 |
+
|
87 |
+
def _compile_patterns(self) -> None:
|
88 |
+
"""Compile regex patterns for better performance."""
|
89 |
+
# Pattern for finding potential sentence boundaries
|
90 |
+
self.SENTENCE_END: Pattern = re.compile(
|
91 |
+
r'''
|
92 |
+
# Group for sentence endings
|
93 |
+
(?:
|
94 |
+
# Standard endings with optional quotes/brackets
|
95 |
+
(?<=[.!?])[\"\'\)\]\}»›」』\s]*
|
96 |
+
|
97 |
+
# Ellipsis
|
98 |
+
|(?:\.{2,}|…)
|
99 |
+
|
100 |
+
# Asian-style endings
|
101 |
+
|(?<=[。!?」』】\s])
|
102 |
+
)
|
103 |
+
|
104 |
+
# Must be followed by whitespace and capital letter or number
|
105 |
+
(?=\s+(?:[A-Z0-9]|["'({[\[「『《‹〈][A-Z]))
|
106 |
+
''',
|
107 |
+
re.VERBOSE
|
108 |
+
)
|
109 |
+
|
110 |
+
# Pattern for abbreviations
|
111 |
+
abbrev_pattern = '|'.join(re.escape(abbr) for abbr in self.all_abbreviations)
|
112 |
+
self.ABBREV_PATTERN: Pattern = re.compile(
|
113 |
+
fr'\b(?:{abbrev_pattern})\.?',
|
114 |
+
re.IGNORECASE
|
115 |
+
)
|
116 |
+
|
117 |
+
def _protect_special_cases(self, text: str) -> Tuple[str, Dict[str, str]]:
|
118 |
+
"""Protect URLs, emails, and other special cases from being split."""
|
119 |
+
protected = text
|
120 |
+
placeholders: Dict[str, str] = {}
|
121 |
+
counter = 0
|
122 |
+
|
123 |
+
# Protect URLs and emails
|
124 |
+
for pattern in [self.URL_PATTERN, self.EMAIL_PATTERN]:
|
125 |
+
for match in re.finditer(pattern, protected):
|
126 |
+
placeholder = f'__PROTECTED_{counter}__'
|
127 |
+
placeholders[placeholder] = match.group()
|
128 |
+
protected = protected.replace(match.group(), placeholder)
|
129 |
+
counter += 1
|
130 |
+
|
131 |
+
# Protect quoted content
|
132 |
+
stack = []
|
133 |
+
protected_chars = list(protected)
|
134 |
+
i = 0
|
135 |
+
while i < len(protected_chars):
|
136 |
+
char = protected_chars[i]
|
137 |
+
if char in self.QUOTE_PAIRS:
|
138 |
+
stack.append((char, i))
|
139 |
+
elif stack and char == self.QUOTE_PAIRS[stack[-1][0]]:
|
140 |
+
start_quote, start_idx = stack.pop()
|
141 |
+
content = ''.join(protected_chars[start_idx:i + 1])
|
142 |
+
placeholder = f'__PROTECTED_{counter}__'
|
143 |
+
placeholders[placeholder] = content
|
144 |
+
protected_chars[start_idx:i + 1] = list(placeholder)
|
145 |
+
counter += 1
|
146 |
+
i += 1
|
147 |
+
|
148 |
+
return ''.join(protected_chars), placeholders
|
149 |
+
|
150 |
+
def _restore_special_cases(self, text: str, placeholders: Dict[str, str]) -> str:
|
151 |
+
"""Restore protected content."""
|
152 |
+
restored = text
|
153 |
+
for placeholder, original in placeholders.items():
|
154 |
+
restored = restored.replace(placeholder, original)
|
155 |
+
return restored
|
156 |
+
|
157 |
+
def _handle_abbreviations(self, text: str) -> str:
|
158 |
+
"""Handle abbreviations to prevent incorrect sentence splitting."""
|
159 |
+
def replace_abbrev(match: re.Match) -> str:
|
160 |
+
abbr = match.group().lower().rstrip('.')
|
161 |
+
if abbr in self.all_abbreviations:
|
162 |
+
return match.group().replace('.', '__DOT__')
|
163 |
+
return match.group()
|
164 |
+
|
165 |
+
return self.ABBREV_PATTERN.sub(replace_abbrev, text)
|
166 |
+
|
167 |
+
def _normalize_whitespace(self, text: str) -> str:
|
168 |
+
"""Normalize whitespace while preserving paragraph breaks."""
|
169 |
+
# Replace multiple newlines with special marker
|
170 |
+
text = re.sub(r'\n\s*\n', ' __PARA__ ', text)
|
171 |
+
# Normalize remaining whitespace
|
172 |
+
text = re.sub(r'\s+', ' ', text)
|
173 |
+
return text.strip()
|
174 |
+
|
175 |
+
def _restore_formatting(self, sentences: List[str]) -> List[str]:
|
176 |
+
"""Restore original formatting and clean up sentences."""
|
177 |
+
restored = []
|
178 |
+
for sentence in sentences:
|
179 |
+
# Restore dots in abbreviations
|
180 |
+
sentence = sentence.replace('__DOT__', '.')
|
181 |
+
|
182 |
+
# Restore paragraph breaks
|
183 |
+
sentence = sentence.replace('__PARA__', '\n\n')
|
184 |
+
|
185 |
+
# Clean up whitespace
|
186 |
+
sentence = re.sub(r'\s+', ' ', sentence).strip()
|
187 |
+
|
188 |
+
# Capitalize first letter if it's lowercase and not an abbreviation
|
189 |
+
words = sentence.split()
|
190 |
+
if words and words[0].lower() not in self.all_abbreviations:
|
191 |
+
sentence = sentence[0].upper() + sentence[1:]
|
192 |
+
|
193 |
+
if sentence:
|
194 |
+
restored.append(sentence)
|
195 |
+
|
196 |
+
return restored
|
197 |
+
|
198 |
+
def tokenize(self, text: str) -> List[str]:
|
199 |
+
"""
|
200 |
+
Split text into sentences while handling complex cases.
|
201 |
+
|
202 |
+
Args:
|
203 |
+
text (str): Input text to split into sentences.
|
204 |
+
|
205 |
+
Returns:
|
206 |
+
List[str]: List of properly formatted sentences.
|
207 |
+
"""
|
208 |
+
if not text or not text.strip():
|
209 |
+
return []
|
210 |
+
|
211 |
+
# Step 1: Protect special cases
|
212 |
+
protected_text, placeholders = self._protect_special_cases(text)
|
213 |
+
|
214 |
+
# Step 2: Normalize whitespace
|
215 |
+
protected_text = self._normalize_whitespace(protected_text)
|
216 |
+
|
217 |
+
# Step 3: Handle abbreviations
|
218 |
+
protected_text = self._handle_abbreviations(protected_text)
|
219 |
+
|
220 |
+
# Step 4: Split into potential sentences
|
221 |
+
potential_sentences = self.SENTENCE_END.split(protected_text)
|
222 |
+
|
223 |
+
# Step 5: Process and restore formatting
|
224 |
+
sentences = self._restore_formatting(potential_sentences)
|
225 |
+
|
226 |
+
# Step 6: Restore special cases
|
227 |
+
sentences = [self._restore_special_cases(s, placeholders) for s in sentences]
|
228 |
+
|
229 |
+
# Step 7: Post-process sentences
|
230 |
+
final_sentences = []
|
231 |
+
current_sentence = []
|
232 |
+
|
233 |
+
for sentence in sentences:
|
234 |
+
# Skip empty sentences
|
235 |
+
if not sentence.strip():
|
236 |
+
continue
|
237 |
+
|
238 |
+
# Check if sentence might be continuation of previous
|
239 |
+
if current_sentence and sentence[0].islower():
|
240 |
+
current_sentence.append(sentence)
|
241 |
+
else:
|
242 |
+
if current_sentence:
|
243 |
+
final_sentences.append(' '.join(current_sentence))
|
244 |
+
current_sentence = [sentence]
|
245 |
+
|
246 |
+
# Add last sentence if exists
|
247 |
+
if current_sentence:
|
248 |
+
final_sentences.append(' '.join(current_sentence))
|
249 |
+
|
250 |
+
return final_sentences
|
251 |
+
|
252 |
+
|
253 |
+
def split_sentences(text: str) -> List[str]:
|
254 |
+
"""
|
255 |
+
Convenience function to split text into sentences using SentenceTokenizer.
|
256 |
+
|
257 |
+
Args:
|
258 |
+
text (str): Input text to split into sentences.
|
259 |
+
|
260 |
+
Returns:
|
261 |
+
List[str]: List of properly formatted sentences.
|
262 |
+
"""
|
263 |
+
tokenizer = SentenceTokenizer()
|
264 |
+
return tokenizer.tokenize(text)
|
265 |
+
|
266 |
+
|
267 |
+
class ElevenlabsTTS(TTSProvider):
|
268 |
+
"""
|
269 |
+
Text-to-speech provider using the ElevenlabsTTS API.
|
270 |
+
"""
|
271 |
+
# Request headers
|
272 |
+
headers: dict[str, str] = {
|
273 |
+
"User-Agent": LitAgent().random()
|
274 |
+
}
|
275 |
+
cache_dir = pathlib.Path("./audio_cache")
|
276 |
+
all_voices: dict[str, str] = {"Brian": "nPczCjzI2devNBz1zQrb", "Alice":"Xb7hH8MSUJpSbSDYk0k2", "Bill":"pqHfZKP75CvOlQylNhV4", "Callum":"N2lVS1w4EtoT3dr4eOWO", "Charlie":"IKne3meq5aSn9XLyUdCD", "Charlotte":"XB0fDUnXU5powFXDhCwa", "Chris":"iP95p4xoKVk53GoZ742B", "Daniel":"onwK4e9ZLuTAKqWW03F9", "Eric":"cjVigY5qzO86Huf0OWal", "George":"JBFqnCBsd6RMkjVDRZzb", "Jessica":"cgSgspJ2msm6clMCkdW9", "Laura":"FGY2WhTYpPnrIDTdsKH5", "Liam":"TX3LPaxmHKxFdv7VOQHJ", "Lily":"pFZP5JQG7iQjIQuC4Bku", "Matilda":"XrExE9yKIg1WjnnlVkGX", "Sarah":"EXAVITQu4vr4xnSDxMaL", "Will":"bIHbv24MWmeRgasZH58o", "Neal":"Zp1aWhL05Pi5BkhizFC3"}
|
277 |
+
|
278 |
+
def __init__(self, timeout: int = 20, proxies: dict = None):
|
279 |
+
"""Initializes the ElevenlabsTTS TTS client."""
|
280 |
+
self.session = requests.Session()
|
281 |
+
self.session.headers.update(self.headers)
|
282 |
+
if proxies:
|
283 |
+
self.session.proxies.update(proxies)
|
284 |
+
self.timeout = timeout
|
285 |
+
self.params = {'allow_unauthenticated': '1'}
|
286 |
+
|
287 |
+
def tts(self, text: str, voice: str = "Brian", verbose:bool = True) -> str:
|
288 |
+
"""
|
289 |
+
Converts text to speech using the ElevenlabsTTS API and saves it to a file.
|
290 |
+
"""
|
291 |
+
assert (
|
292 |
+
voice in self.all_voices
|
293 |
+
), f"Voice '{voice}' not one of [{', '.join(self.all_voices.keys())}]"
|
294 |
+
|
295 |
+
filename = self.cache_dir / f"{int(time.time())}.mp3"
|
296 |
+
|
297 |
+
# Split text into sentences
|
298 |
+
sentences = split_sentences(text)
|
299 |
+
|
300 |
+
# Function to request audio for each chunk
|
301 |
+
def generate_audio_for_chunk(part_text: str, part_number: int):
|
302 |
+
while True:
|
303 |
+
try:
|
304 |
+
json_data = {'text': part_text, 'model_id': 'eleven_multilingual_v2'}
|
305 |
+
response = self.session.post(f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',params=self.params, headers=self.headers, json=json_data, timeout=self.timeout)
|
306 |
+
response.raise_for_status()
|
307 |
+
|
308 |
+
# Create the audio_cache directory if it doesn't exist
|
309 |
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
310 |
+
|
311 |
+
# Check if the request was successful
|
312 |
+
if response.ok and response.status_code == 200:
|
313 |
+
return part_number, response.content
|
314 |
+
else:
|
315 |
+
raise exceptions.FailedToGenerateResponseError(
|
316 |
+
f"Failed to generate audio for chunk {part_number}: {response.status_code}"
|
317 |
+
)
|
318 |
+
except requests.RequestException as e:
|
319 |
+
time.sleep(1)
|
320 |
+
continue
|
321 |
+
|
322 |
+
try:
|
323 |
+
# Using ThreadPoolExecutor to handle requests concurrently
|
324 |
+
with ThreadPoolExecutor() as executor:
|
325 |
+
futures = {executor.submit(generate_audio_for_chunk, sentence.strip(), chunk_num): chunk_num
|
326 |
+
for chunk_num, sentence in enumerate(sentences, start=1)}
|
327 |
+
|
328 |
+
# Dictionary to store results with order preserved
|
329 |
+
audio_chunks = {}
|
330 |
+
|
331 |
+
for future in as_completed(futures):
|
332 |
+
chunk_num = futures[future]
|
333 |
+
try:
|
334 |
+
part_number, audio_data = future.result()
|
335 |
+
audio_chunks[part_number] = audio_data
|
336 |
+
except Exception as e:
|
337 |
+
raise exceptions.FailedToGenerateResponseError(
|
338 |
+
f"Failed to generate audio for chunk {chunk_num}: {e}"
|
339 |
+
)
|
340 |
+
|
341 |
+
# Combine audio chunks in the correct sequence
|
342 |
+
combined_audio = BytesIO()
|
343 |
+
for part_number in sorted(audio_chunks.keys()):
|
344 |
+
combined_audio.write(audio_chunks[part_number])
|
345 |
+
|
346 |
+
# Save the combined audio data to a single file
|
347 |
+
with open(filename, 'wb') as f:
|
348 |
+
f.write(combined_audio.getvalue())
|
349 |
+
return filename.as_posix()
|
350 |
+
|
351 |
+
except requests.exceptions.RequestException as e:
|
352 |
+
raise exceptions.FailedToGenerateResponseError(
|
353 |
+
f"Failed to perform the operation: {e}"
|
354 |
+
)
|
355 |
+
|
356 |
+
|
357 |
+
# Example usage
|
358 |
+
if __name__ == "__main__":
|
359 |
+
elevenlabs = ElevenlabsTTS()
|
360 |
+
text = "This is a test of the ElevenlabsTTS text-to-speech API. It supports multiple sentences and advanced logging."
|
361 |
+
|
362 |
+
audio_file = elevenlabs.tts(text, voice="Brian")
|