import time import requests from io import BytesIO from pathlib import Path from typing import List import re import tempfile from flask import Flask, request, render_template, send_file app = Flask(__name__) class SentenceTokenizer: """Advanced sentence tokenizer with support for complex cases.""" def __init__(self): self.SENTENCE_END = re.compile( r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+', re.VERBOSE ) def tokenize(self, text: str) -> List[str]: if not text or not text.strip(): return [] # Simple sentence splitting sentences = self.SENTENCE_END.split(text.strip()) return [s.strip() for s in sentences if s.strip()] def split_sentences(text: str) -> List[str]: tokenizer = SentenceTokenizer() return tokenizer.tokenize(text) class ElevenlabsTTS: """Text-to-speech provider using Elevenlabs API.""" def __init__(self): self.session = requests.Session() self.session.headers.update({"User-Agent": "Mozilla/5.0"}) self.cache_dir = Path(tempfile.gettempdir()) self.all_voices = { "Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2", "Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO", "Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa", "Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9", "Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb", "Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5", "Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku", "Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL", "Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3" } self.params = {'allow_unauthenticated': '1'} def tts(self, text: str, voice: str = "Brian") -> str: if voice not in self.all_voices: raise ValueError(f"Voice '{voice}' not available") filename = self.cache_dir / f"tts_{int(time.time())}.mp3" sentences = split_sentences(text) audio_chunks = {} for i, sentence in enumerate(sentences, 1): json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'} response = self.session.post( f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}', params=self.params, json=json_data, timeout=20 ) response.raise_for_status() audio_chunks[i] = response.content combined_audio = BytesIO() for i in sorted(audio_chunks.keys()): combined_audio.write(audio_chunks[i]) with open(filename, 'wb') as f: f.write(combined_audio.getvalue()) return filename.as_posix() # Web Interface tts_provider = ElevenlabsTTS() @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': text = request.form.get('text') voice = request.form.get('voice', 'Brian') try: audio_file = tts_provider.tts(text, voice) return render_template('index.html', audio_file=audio_file, voices=tts_provider.all_voices.keys(), text=text, voice=voice) except Exception as e: return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys()) return render_template('index.html', voices=tts_provider.all_voices.keys()) @app.route('/audio/') def serve_audio(filename): audio_path = Path(tempfile.gettempdir()) / filename return send_file(audio_path, mimetype='audio/mpeg') if __name__ == "__main__": app.run(host='0.0.0.0', port=5000)