Niansuh commited on
Commit
33c2569
Β·
verified Β·
1 Parent(s): 92d5846

Update tts_script.py

Browse files
Files changed (1) hide show
  1. tts_script.py +81 -346
tts_script.py CHANGED
@@ -2,361 +2,96 @@ import time
2
  import requests
3
  import pathlib
4
  from io import BytesIO
5
- from playsound import playsound
6
- from webscout import exceptions
7
- from webscout.AIbase import TTSProvider
8
- from webscout.litagent import LitAgent
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
- """
11
- Text processing utilities for TTS providers.
12
- """
13
- from typing import List, Dict, Tuple, Set, Optional, Pattern
14
- import re
15
 
 
 
16
 
17
- class SentenceTokenizer:
18
- """Advanced sentence tokenizer with support for complex cases and proper formatting."""
19
-
20
- def __init__(self) -> None:
21
- # Common abbreviations by category
22
- self.TITLES: Set[str] = {
23
- 'mr', 'mrs', 'ms', 'dr', 'prof', 'rev', 'sr', 'jr', 'esq',
24
- 'hon', 'pres', 'gov', 'atty', 'supt', 'det', 'rev', 'col','maj', 'gen', 'capt', 'cmdr',
25
- 'lt', 'sgt', 'cpl', 'pvt'
26
- }
27
-
28
- self.ACADEMIC: Set[str] = {
29
- 'ph.d', 'phd', 'm.d', 'md', 'b.a', 'ba', 'm.a', 'ma', 'd.d.s', 'dds',
30
- 'm.b.a', 'mba', 'b.sc', 'bsc', 'm.sc', 'msc', 'llb', 'll.b', 'bl'
31
- }
32
-
33
- self.ORGANIZATIONS: Set[str] = {
34
- 'inc', 'ltd', 'co', 'corp', 'llc', 'llp', 'assn', 'bros', 'plc', 'cos',
35
- 'intl', 'dept', 'est', 'dist', 'mfg', 'div'
36
- }
37
-
38
- self.MONTHS: Set[str] = {
39
- 'jan', 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'
40
- }
41
-
42
- self.UNITS: Set[str] = {
43
- 'oz', 'pt', 'qt', 'gal', 'ml', 'cc', 'km', 'cm', 'mm', 'ft', 'in',
44
- 'kg', 'lb', 'lbs', 'hz', 'khz', 'mhz', 'ghz', 'kb', 'mb', 'gb', 'tb'
45
- }
46
-
47
- self.TECHNOLOGY: Set[str] = {
48
- 'v', 'ver', 'app', 'sys', 'dir', 'exe', 'lib', 'api', 'sdk', 'url',
49
- 'cpu', 'gpu', 'ram', 'rom', 'hdd', 'ssd', 'lan', 'wan', 'sql', 'html'
50
- }
51
-
52
- self.MISC: Set[str] = {
53
- 'vs', 'etc', 'ie', 'eg', 'no', 'al', 'ca', 'cf', 'pp', 'est', 'st',
54
- 'approx', 'appt', 'apt', 'dept', 'depts', 'min', 'max', 'avg'
55
- }
56
 
57
- # Combine all abbreviations
58
- self.all_abbreviations: Set[str] = (
59
- self.TITLES | self.ACADEMIC | self.ORGANIZATIONS |
60
- self.MONTHS | self.UNITS | self.TECHNOLOGY | self.MISC
61
- )
 
62
 
63
- # Special patterns
64
- self.ELLIPSIS: str = r'\.{2,}|…'
65
- self.URL_PATTERN: str = (
66
- r'(?:https?:\/\/|www\.)[\w\-\.]+\.[a-zA-Z]{2,}(?:\/[^\s]*)?'
67
- )
68
- self.EMAIL_PATTERN: str = r'[\w\.-]+@[\w\.-]+\.\w+'
69
- self.NUMBER_PATTERN: str = (
70
- r'\d+(?:\.\d+)?(?:%|Β°|km|cm|mm|m|kg|g|lb|ft|in|mph|kmh|hz|mhz|ghz)?'
71
- )
72
-
73
- # Quote and bracket pairs
74
- self.QUOTE_PAIRS: Dict[str, str] = {
75
- '"': '"', "'": "'", '"': '"', "γ€Œ": "」", "γ€Ž": "』",
76
- "Β«": "Β»", "β€Ή": "β€Ί", "'": "'", "β€š": "'"
77
- }
78
-
79
- self.BRACKETS: Dict[str, str] = {
80
- '(': ')', '[': ']', '{': '}', '⟨': '⟩', 'γ€Œ': '」',
81
- 'γ€Ž': '』', '【': '】', 'γ€–': 'γ€—', 'ο½’': 'ο½£'
82
- }
83
 
84
- # Compile regex patterns
85
- self._compile_patterns()
 
 
86
 
87
- def _compile_patterns(self) -> None:
88
- """Compile regex patterns for better performance."""
89
- # Pattern for finding potential sentence boundaries
90
- self.SENTENCE_END: Pattern = re.compile(
91
- r'''
92
- # Group for sentence endings
93
- (?:
94
- # Standard endings with optional quotes/brackets
95
- (?<=[.!?])[\"\'\)\]\}»›」』\s]*
96
-
97
- # Ellipsis
98
- |(?:\.{2,}|…)
99
-
100
- # Asian-style endings
101
- |(?<=[γ€‚οΌοΌŸγ€γ€γ€‘\s])
102
- )
103
-
104
- # Must be followed by whitespace and capital letter or number
105
- (?=\s+(?:[A-Z0-9]|["'({[\[γ€Œγ€Žγ€Šβ€Ήγ€ˆ][A-Z]))
106
- ''',
107
- re.VERBOSE
108
- )
109
-
110
- # Pattern for abbreviations
111
- abbrev_pattern = '|'.join(re.escape(abbr) for abbr in self.all_abbreviations)
112
- self.ABBREV_PATTERN: Pattern = re.compile(
113
- fr'\b(?:{abbrev_pattern})\.?',
114
- re.IGNORECASE
115
- )
116
-
117
- def _protect_special_cases(self, text: str) -> Tuple[str, Dict[str, str]]:
118
- """Protect URLs, emails, and other special cases from being split."""
119
- protected = text
120
- placeholders: Dict[str, str] = {}
121
- counter = 0
122
-
123
- # Protect URLs and emails
124
- for pattern in [self.URL_PATTERN, self.EMAIL_PATTERN]:
125
- for match in re.finditer(pattern, protected):
126
- placeholder = f'__PROTECTED_{counter}__'
127
- placeholders[placeholder] = match.group()
128
- protected = protected.replace(match.group(), placeholder)
129
- counter += 1
130
-
131
- # Protect quoted content
132
- stack = []
133
- protected_chars = list(protected)
134
- i = 0
135
- while i < len(protected_chars):
136
- char = protected_chars[i]
137
- if char in self.QUOTE_PAIRS:
138
- stack.append((char, i))
139
- elif stack and char == self.QUOTE_PAIRS[stack[-1][0]]:
140
- start_quote, start_idx = stack.pop()
141
- content = ''.join(protected_chars[start_idx:i + 1])
142
- placeholder = f'__PROTECTED_{counter}__'
143
- placeholders[placeholder] = content
144
- protected_chars[start_idx:i + 1] = list(placeholder)
145
- counter += 1
146
- i += 1
147
-
148
- return ''.join(protected_chars), placeholders
149
-
150
- def _restore_special_cases(self, text: str, placeholders: Dict[str, str]) -> str:
151
- """Restore protected content."""
152
- restored = text
153
- for placeholder, original in placeholders.items():
154
- restored = restored.replace(placeholder, original)
155
- return restored
156
-
157
- def _handle_abbreviations(self, text: str) -> str:
158
- """Handle abbreviations to prevent incorrect sentence splitting."""
159
- def replace_abbrev(match: re.Match) -> str:
160
- abbr = match.group().lower().rstrip('.')
161
- if abbr in self.all_abbreviations:
162
- return match.group().replace('.', '__DOT__')
163
- return match.group()
164
-
165
- return self.ABBREV_PATTERN.sub(replace_abbrev, text)
166
-
167
- def _normalize_whitespace(self, text: str) -> str:
168
- """Normalize whitespace while preserving paragraph breaks."""
169
- # Replace multiple newlines with special marker
170
- text = re.sub(r'\n\s*\n', ' __PARA__ ', text)
171
- # Normalize remaining whitespace
172
- text = re.sub(r'\s+', ' ', text)
173
- return text.strip()
174
-
175
- def _restore_formatting(self, sentences: List[str]) -> List[str]:
176
- """Restore original formatting and clean up sentences."""
177
- restored = []
178
- for sentence in sentences:
179
- # Restore dots in abbreviations
180
- sentence = sentence.replace('__DOT__', '.')
181
-
182
- # Restore paragraph breaks
183
- sentence = sentence.replace('__PARA__', '\n\n')
184
-
185
- # Clean up whitespace
186
- sentence = re.sub(r'\s+', ' ', sentence).strip()
187
-
188
- # Capitalize first letter if it's lowercase and not an abbreviation
189
- words = sentence.split()
190
- if words and words[0].lower() not in self.all_abbreviations:
191
- sentence = sentence[0].upper() + sentence[1:]
192
-
193
- if sentence:
194
- restored.append(sentence)
195
-
196
- return restored
197
-
198
- def tokenize(self, text: str) -> List[str]:
199
- """
200
- Split text into sentences while handling complex cases.
201
-
202
- Args:
203
- text (str): Input text to split into sentences.
204
-
205
- Returns:
206
- List[str]: List of properly formatted sentences.
207
- """
208
- if not text or not text.strip():
209
- return []
210
-
211
- # Step 1: Protect special cases
212
- protected_text, placeholders = self._protect_special_cases(text)
213
-
214
- # Step 2: Normalize whitespace
215
- protected_text = self._normalize_whitespace(protected_text)
216
-
217
- # Step 3: Handle abbreviations
218
- protected_text = self._handle_abbreviations(protected_text)
219
-
220
- # Step 4: Split into potential sentences
221
- potential_sentences = self.SENTENCE_END.split(protected_text)
222
-
223
- # Step 5: Process and restore formatting
224
- sentences = self._restore_formatting(potential_sentences)
225
-
226
- # Step 6: Restore special cases
227
- sentences = [self._restore_special_cases(s, placeholders) for s in sentences]
228
-
229
- # Step 7: Post-process sentences
230
- final_sentences = []
231
- current_sentence = []
232
-
233
- for sentence in sentences:
234
- # Skip empty sentences
235
- if not sentence.strip():
236
- continue
237
-
238
- # Check if sentence might be continuation of previous
239
- if current_sentence and sentence[0].islower():
240
- current_sentence.append(sentence)
241
- else:
242
- if current_sentence:
243
- final_sentences.append(' '.join(current_sentence))
244
- current_sentence = [sentence]
245
-
246
- # Add last sentence if exists
247
- if current_sentence:
248
- final_sentences.append(' '.join(current_sentence))
249
-
250
- return final_sentences
251
-
252
-
253
- def split_sentences(text: str) -> List[str]:
254
- """
255
- Convenience function to split text into sentences using SentenceTokenizer.
256
-
257
- Args:
258
- text (str): Input text to split into sentences.
259
-
260
- Returns:
261
- List[str]: List of properly formatted sentences.
262
- """
263
- tokenizer = SentenceTokenizer()
264
- return tokenizer.tokenize(text)
265
-
266
-
267
- class ElevenlabsTTS(TTSProvider):
268
- """
269
- Text-to-speech provider using the ElevenlabsTTS API.
270
- """
271
- # Request headers
272
- headers: dict[str, str] = {
273
- "User-Agent": LitAgent().random()
274
- }
275
- cache_dir = pathlib.Path("./audio_cache")
276
- all_voices: dict[str, str] = {"Brian": "nPczCjzI2devNBz1zQrb", "Alice":"Xb7hH8MSUJpSbSDYk0k2", "Bill":"pqHfZKP75CvOlQylNhV4", "Callum":"N2lVS1w4EtoT3dr4eOWO", "Charlie":"IKne3meq5aSn9XLyUdCD", "Charlotte":"XB0fDUnXU5powFXDhCwa", "Chris":"iP95p4xoKVk53GoZ742B", "Daniel":"onwK4e9ZLuTAKqWW03F9", "Eric":"cjVigY5qzO86Huf0OWal", "George":"JBFqnCBsd6RMkjVDRZzb", "Jessica":"cgSgspJ2msm6clMCkdW9", "Laura":"FGY2WhTYpPnrIDTdsKH5", "Liam":"TX3LPaxmHKxFdv7VOQHJ", "Lily":"pFZP5JQG7iQjIQuC4Bku", "Matilda":"XrExE9yKIg1WjnnlVkGX", "Sarah":"EXAVITQu4vr4xnSDxMaL", "Will":"bIHbv24MWmeRgasZH58o", "Neal":"Zp1aWhL05Pi5BkhizFC3"}
277
-
278
- def __init__(self, timeout: int = 20, proxies: dict = None):
279
- """Initializes the ElevenlabsTTS TTS client."""
280
- self.session = requests.Session()
281
- self.session.headers.update(self.headers)
282
- if proxies:
283
- self.session.proxies.update(proxies)
284
- self.timeout = timeout
285
- self.params = {'allow_unauthenticated': '1'}
286
-
287
- def tts(self, text: str, voice: str = "Brian", verbose:bool = True) -> str:
288
- """
289
- Converts text to speech using the ElevenlabsTTS API and saves it to a file.
290
- """
291
- assert (
292
- voice in self.all_voices
293
- ), f"Voice '{voice}' not one of [{', '.join(self.all_voices.keys())}]"
294
-
295
- filename = self.cache_dir / f"{int(time.time())}.mp3"
296
-
297
- # Split text into sentences
298
- sentences = split_sentences(text)
299
-
300
- # Function to request audio for each chunk
301
- def generate_audio_for_chunk(part_text: str, part_number: int):
302
- while True:
303
- try:
304
- json_data = {'text': part_text, 'model_id': 'eleven_multilingual_v2'}
305
- response = self.session.post(f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',params=self.params, headers=self.headers, json=json_data, timeout=self.timeout)
306
- response.raise_for_status()
307
-
308
- # Create the audio_cache directory if it doesn't exist
309
- self.cache_dir.mkdir(parents=True, exist_ok=True)
310
-
311
- # Check if the request was successful
312
- if response.ok and response.status_code == 200:
313
- return part_number, response.content
314
- else:
315
- raise exceptions.FailedToGenerateResponseError(
316
- f"Failed to generate audio for chunk {part_number}: {response.status_code}"
317
- )
318
- except requests.RequestException as e:
319
- time.sleep(1)
320
- continue
321
 
 
322
  try:
323
- # Using ThreadPoolExecutor to handle requests concurrently
324
- with ThreadPoolExecutor() as executor:
325
- futures = {executor.submit(generate_audio_for_chunk, sentence.strip(), chunk_num): chunk_num
326
- for chunk_num, sentence in enumerate(sentences, start=1)}
327
-
328
- # Dictionary to store results with order preserved
329
- audio_chunks = {}
330
-
331
- for future in as_completed(futures):
332
- chunk_num = futures[future]
333
- try:
334
- part_number, audio_data = future.result()
335
- audio_chunks[part_number] = audio_data
336
- except Exception as e:
337
- raise exceptions.FailedToGenerateResponseError(
338
- f"Failed to generate audio for chunk {chunk_num}: {e}"
339
- )
340
-
341
- # Combine audio chunks in the correct sequence
342
- combined_audio = BytesIO()
343
- for part_number in sorted(audio_chunks.keys()):
344
- combined_audio.write(audio_chunks[part_number])
345
-
346
- # Save the combined audio data to a single file
347
- with open(filename, 'wb') as f:
348
- f.write(combined_audio.getvalue())
349
- return filename.as_posix()
350
-
351
- except requests.exceptions.RequestException as e:
352
- raise exceptions.FailedToGenerateResponseError(
353
- f"Failed to perform the operation: {e}"
354
  )
355
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
- # Example usage
358
  if __name__ == "__main__":
359
- elevenlabs = ElevenlabsTTS()
360
- text = "This is a test of the ElevenlabsTTS text-to-speech API. It supports multiple sentences and advanced logging."
361
-
362
- audio_file = elevenlabs.tts(text, voice="Brian")
 
2
  import requests
3
  import pathlib
4
  from io import BytesIO
5
+ from flask import Flask, request, jsonify, send_file
 
 
 
6
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
 
 
 
7
 
8
+ # Flask App Setup
9
+ app = Flask(__name__)
10
 
11
+ # ElevenLabs API Configuration
12
+ ELEVENLABS_API_URL = "https://api.elevenlabs.io/v1/text-to-speech"
13
+ HEADERS = {"User-Agent": "TTSApp"}
14
+ CACHE_DIR = pathlib.Path("./audio_cache")
15
+ CACHE_DIR.mkdir(exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Available Voices
18
+ ALL_VOICES = {
19
+ "Brian": "nPczCjzI2devNBz1zQrb",
20
+ "Alice": "Xb7hH8MSUJpSbSDYk0k2",
21
+ "Will": "bIHbv24MWmeRgasZH58o",
22
+ }
23
 
24
+ # Split text into sentences (Basic)
25
+ def split_sentences(text):
26
+ return text.split(". ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Generate TTS
29
+ def generate_audio(text, voice):
30
+ if voice not in ALL_VOICES:
31
+ return {"error": f"Invalid voice '{voice}'"}
32
 
33
+ filename = CACHE_DIR / f"{int(time.time())}.mp3"
34
+ sentences = split_sentences(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ def fetch_audio(sentence, part_number):
37
  try:
38
+ response = requests.post(
39
+ f"{ELEVENLABS_API_URL}/{ALL_VOICES[voice]}",
40
+ headers=HEADERS,
41
+ json={"text": sentence, "model_id": "eleven_multilingual_v2"},
42
+ timeout=20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
+ response.raise_for_status()
45
+ return part_number, response.content
46
+ except requests.RequestException:
47
+ return part_number, None
48
+
49
+ audio_chunks = {}
50
+ with ThreadPoolExecutor() as executor:
51
+ futures = {executor.submit(fetch_audio, sentence.strip(), i): i for i, sentence in enumerate(sentences)}
52
+
53
+ for future in as_completed(futures):
54
+ part_number, audio_data = future.result()
55
+ if audio_data:
56
+ audio_chunks[part_number] = audio_data
57
+
58
+ combined_audio = BytesIO()
59
+ for part_number in sorted(audio_chunks.keys()):
60
+ combined_audio.write(audio_chunks[part_number])
61
+
62
+ with open(filename, "wb") as f:
63
+ f.write(combined_audio.getvalue())
64
+
65
+ return filename.as_posix()
66
+
67
+ # Flask Routes
68
+ @app.route("/")
69
+ def home():
70
+ return '''
71
+ <h1>Text-to-Speech API</h1>
72
+ <form action="/tts" method="post">
73
+ <label>Text:</label>
74
+ <input type="text" name="text" required>
75
+ <label>Voice:</label>
76
+ <select name="voice">
77
+ <option value="Brian">Brian</option>
78
+ <option value="Alice">Alice</option>
79
+ <option value="Will">Will</option>
80
+ </select>
81
+ <button type="submit">Generate</button>
82
+ </form>
83
+ '''
84
+
85
+ @app.route("/tts", methods=["POST"])
86
+ def tts():
87
+ text = request.form.get("text")
88
+ voice = request.form.get("voice", "Brian")
89
+
90
+ if not text:
91
+ return jsonify({"error": "Text is required!"})
92
+
93
+ audio_file = generate_audio(text, voice)
94
+ return send_file(audio_file, as_attachment=True)
95
 
 
96
  if __name__ == "__main__":
97
+ app.run(debug=True, host="0.0.0.0", port=5000)