File size: 3,952 Bytes
dc0538c c1d9f4f e65b5bc dc0538c ed55f2d dc0538c ed55f2d c1d9f4f dc0538c ed55f2d 8662041 dc0538c ed55f2d dc0538c ed55f2d dc0538c ed55f2d dc0538c b2d49c3 e65b5bc dc0538c 9a058f1 dc0538c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
from flask import Flask, request, render_template, send_file
app = Flask(__name__)
class SentenceTokenizer:
"""Advanced sentence tokenizer with support for complex cases."""
def __init__(self):
self.SENTENCE_END = re.compile(
r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
re.VERBOSE
)
def tokenize(self, text: str) -> List[str]:
if not text or not text.strip():
return []
# Simple sentence splitting
sentences = self.SENTENCE_END.split(text.strip())
return [s.strip() for s in sentences if s.strip()]
def split_sentences(text: str) -> List[str]:
tokenizer = SentenceTokenizer()
return tokenizer.tokenize(text)
class ElevenlabsTTS:
"""Text-to-speech provider using Elevenlabs API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({"User-Agent": "Mozilla/5.0"})
self.cache_dir = Path(tempfile.gettempdir())
self.all_voices = {
"Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2",
"Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO",
"Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa",
"Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9",
"Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb",
"Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5",
"Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku",
"Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL",
"Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
}
self.params = {'allow_unauthenticated': '1'}
def tts(self, text: str, voice: str = "Brian") -> str:
if voice not in self.all_voices:
raise ValueError(f"Voice '{voice}' not available")
filename = self.cache_dir / f"tts_{int(time.time())}.mp3"
sentences = split_sentences(text)
audio_chunks = {}
for i, sentence in enumerate(sentences, 1):
json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
response = self.session.post(
f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
params=self.params,
json=json_data,
timeout=20
)
response.raise_for_status()
audio_chunks[i] = response.content
combined_audio = BytesIO()
for i in sorted(audio_chunks.keys()):
combined_audio.write(audio_chunks[i])
with open(filename, 'wb') as f:
f.write(combined_audio.getvalue())
return filename.as_posix()
# Web Interface
tts_provider = ElevenlabsTTS()
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
text = request.form.get('text')
voice = request.form.get('voice', 'Brian')
try:
audio_file = tts_provider.tts(text, voice)
return render_template('index.html',
audio_file=audio_file,
voices=tts_provider.all_voices.keys(),
text=text,
voice=voice)
except Exception as e:
return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys())
return render_template('index.html', voices=tts_provider.all_voices.keys())
@app.route('/audio/<filename>')
def serve_audio(filename):
audio_path = Path(tempfile.gettempdir()) / filename
return send_file(audio_path, mimetype='audio/mpeg')
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000) |