|
import time |
|
import requests |
|
from io import BytesIO |
|
from pathlib import Path |
|
from typing import List |
|
import re |
|
import tempfile |
|
from flask import Flask, request, render_template, send_file |
|
|
|
app = Flask(__name__) |
|
|
|
class SentenceTokenizer: |
|
"""Advanced sentence tokenizer with support for complex cases.""" |
|
|
|
def __init__(self): |
|
self.SENTENCE_END = re.compile( |
|
r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+', |
|
re.VERBOSE |
|
) |
|
|
|
def tokenize(self, text: str) -> List[str]: |
|
if not text or not text.strip(): |
|
return [] |
|
|
|
sentences = self.SENTENCE_END.split(text.strip()) |
|
return [s.strip() for s in sentences if s.strip()] |
|
|
|
def split_sentences(text: str) -> List[str]: |
|
tokenizer = SentenceTokenizer() |
|
return tokenizer.tokenize(text) |
|
|
|
class ElevenlabsTTS: |
|
"""Text-to-speech provider using Elevenlabs API.""" |
|
|
|
def __init__(self): |
|
self.session = requests.Session() |
|
self.session.headers.update({"User-Agent": "Mozilla/5.0"}) |
|
|
|
self.cache_dir = Path(tempfile.gettempdir()) |
|
self.all_voices = { |
|
"Brian": "nPczCjzI2devNBz1zQrb", |
|
"Alice": "Xb7hH8MSUJpSbSDYk0k2", |
|
|
|
} |
|
self.params = {'allow_unauthenticated': '1'} |
|
|
|
def tts(self, text: str, voice: str = "Brian") -> str: |
|
if voice not in self.all_voices: |
|
raise ValueError(f"Voice '{voice}' not available") |
|
|
|
filename = self.cache_dir / f"tts_{int(time.time())}.mp3" |
|
sentences = split_sentences(text) |
|
|
|
audio_chunks = {} |
|
for i, sentence in enumerate(sentences, 1): |
|
json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'} |
|
response = self.session.post( |
|
f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}', |
|
params=self.params, |
|
json=json_data, |
|
timeout=20 |
|
) |
|
response.raise_for_status() |
|
audio_chunks[i] = response.content |
|
|
|
|
|
combined_audio = BytesIO() |
|
for i in sorted(audio_chunks.keys()): |
|
combined_audio.write(audio_chunks[i]) |
|
|
|
with open(filename, 'wb') as f: |
|
f.write(combined_audio.getvalue()) |
|
return filename.as_posix() |
|
|
|
|
|
tts_provider = ElevenlabsTTS() |
|
|
|
@app.route('/', methods=['GET', 'POST']) |
|
def index(): |
|
if request.method == 'POST': |
|
text = request.form.get('text') |
|
voice = request.form.get('voice', 'Brian') |
|
try: |
|
audio_file = tts_provider.tts(text, voice) |
|
return send_file(audio_file, mimetype='audio/mpeg', as_attachment=True) |
|
except Exception as e: |
|
return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys()) |
|
return render_template('index.html', voices=tts_provider.all_voices.keys()) |
|
|
|
if __name__ == "__main__": |
|
app.run(host='0.0.0.0', port=5000) |