elve / app.py
Niansuh's picture
Update app.py
b032d22 verified
raw
history blame
4.09 kB
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
from flask import Flask, request, render_template, send_file, url_for
app = Flask(__name__)
class SentenceTokenizer:
"""Advanced sentence tokenizer with support for complex cases."""
def __init__(self):
self.SENTENCE_END = re.compile(
r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
re.VERBOSE
)
def tokenize(self, text: str) -> List[str]:
if not text or not text.strip():
return []
# Simple sentence splitting
sentences = self.SENTENCE_END.split(text.strip())
return [s.strip() for s in sentences if s.strip()]
def split_sentences(text: str) -> List[str]:
tokenizer = SentenceTokenizer()
return tokenizer.tokenize(text)
class ElevenlabsTTS:
"""Text-to-speech provider using Elevenlabs API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({"User-Agent": "Mozilla/5.0"})
self.cache_dir = Path(tempfile.gettempdir())
self.all_voices = {
"Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2",
"Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO",
"Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa",
"Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9",
"Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb",
"Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5",
"Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku",
"Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL",
"Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
}
self.params = {'allow_unauthenticated': '1'}
def tts(self, text: str, voice: str = "Brian") -> str:
if voice not in self.all_voices:
raise ValueError(f"Voice '{voice}' not available")
filename = self.cache_dir / f"tts_{int(time.time())}.mp3"
sentences = split_sentences(text)
audio_chunks = {}
for i, sentence in enumerate(sentences, 1):
json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
response = self.session.post(
f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
params=self.params,
json=json_data,
timeout=20
)
response.raise_for_status()
audio_chunks[i] = response.content
combined_audio = BytesIO()
for i in sorted(audio_chunks.keys()):
combined_audio.write(audio_chunks[i])
with open(filename, 'wb') as f:
f.write(combined_audio.getvalue())
return filename.as_posix()
# Serve static audio files
app.config['AUDIO_FOLDER'] = tempfile.gettempdir()
@app.route('/audio/<filename>')
def serve_audio(filename):
return send_file(Path(app.config['AUDIO_FOLDER']) / filename, mimetype='audio/mpeg')
# Web Interface
tts_provider = ElevenlabsTTS()
@app.route('/', methods=['GET', 'POST'])
def index():
audio_url = None
download_filename = None
error = None
if request.method == 'POST':
text = request.form.get('text')
voice = request.form.get('voice', 'Brian')
try:
audio_file = tts_provider.tts(text, voice)
filename = Path(audio_file).name
audio_url = url_for('serve_audio', filename=filename)
download_filename = f"{voice}_output.mp3"
except Exception as e:
error = str(e)
return render_template('index.html',
voices=tts_provider.all_voices.keys(),
audio_url=audio_url,
download_filename=download_filename,
error=error)
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)