|
import time |
|
import requests |
|
from io import BytesIO |
|
from pathlib import Path |
|
from typing import List |
|
import re |
|
import tempfile |
|
from flask import Flask, request, render_template, send_file |
|
|
|
app = Flask(__name__) |
|
|
|
class SentenceTokenizer: |
|
"""Advanced sentence tokenizer with support for complex cases.""" |
|
|
|
def __init__(self): |
|
self.SENTENCE_END = re.compile( |
|
r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+', |
|
re.VERBOSE |
|
) |
|
|
|
def tokenize(self, text: str) -> List[str]: |
|
if not text or not text.strip(): |
|
return [] |
|
|
|
sentences = self.SENTENCE_END.split(text.strip()) |
|
return [s.strip() for s in sentences if s.strip()] |
|
|
|
def split_sentences(text: str) -> List[str]: |
|
tokenizer = SentenceTokenizer() |
|
return tokenizer.tokenize(text) |
|
|
|
class ElevenlabsTTS: |
|
"""Text-to-speech provider using Elevenlabs API.""" |
|
|
|
def __init__(self): |
|
self.session = requests.Session() |
|
self.session.headers.update({"User-Agent": "Mozilla/5.0"}) |
|
self.cache_dir = Path(tempfile.gettempdir()) |
|
self.all_voices = { |
|
"Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2", |
|
"Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO", |
|
"Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa", |
|
"Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9", |
|
"Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb", |
|
"Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5", |
|
"Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku", |
|
"Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL", |
|
"Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3" |
|
} |
|
self.params = {'allow_unauthenticated': '1'} |
|
|
|
def tts(self, text: str, voice: str = "Brian") -> str: |
|
if voice not in self.all_voices: |
|
raise ValueError(f"Voice '{voice}' not available") |
|
|
|
filename = self.cache_dir / f"tts_{int(time.time())}.mp3" |
|
sentences = split_sentences(text) |
|
|
|
audio_chunks = {} |
|
for i, sentence in enumerate(sentences, 1): |
|
json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'} |
|
response = self.session.post( |
|
f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}', |
|
params=self.params, |
|
json=json_data, |
|
timeout=20 |
|
) |
|
response.raise_for_status() |
|
audio_chunks[i] = response.content |
|
|
|
combined_audio = BytesIO() |
|
for i in sorted(audio_chunks.keys()): |
|
combined_audio.write(audio_chunks[i]) |
|
|
|
with open(filename, 'wb') as f: |
|
f.write(combined_audio.getvalue()) |
|
return filename.as_posix() |
|
|
|
|
|
tts_provider = ElevenlabsTTS() |
|
|
|
@app.route('/', methods=['GET', 'POST']) |
|
def index(): |
|
if request.method == 'POST': |
|
text = request.form.get('text') |
|
voice = request.form.get('voice', 'Brian') |
|
try: |
|
audio_file = tts_provider.tts(text, voice) |
|
return render_template('index.html', |
|
audio_file=audio_file, |
|
voices=tts_provider.all_voices.keys(), |
|
text=text, |
|
voice=voice) |
|
except Exception as e: |
|
return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys()) |
|
return render_template('index.html', voices=tts_provider.all_voices.keys()) |
|
|
|
@app.route('/audio/<filename>') |
|
def serve_audio(filename): |
|
audio_path = Path(tempfile.gettempdir()) / filename |
|
return send_file(audio_path, mimetype='audio/mpeg') |
|
|
|
if __name__ == "__main__": |
|
app.run(host='0.0.0.0', port=5000) |