File size: 4,091 Bytes
dc0538c
 
 
 
 
 
c1d9f4f
b032d22
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
 
 
 
 
 
ed55f2d
c1d9f4f
dc0538c
ed55f2d
 
 
 
 
 
 
 
8662041
dc0538c
ed55f2d
dc0538c
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
b032d22
 
 
 
 
 
 
dc0538c
 
 
 
 
b032d22
 
 
 
dc0538c
b032d22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc0538c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
from flask import Flask, request, render_template, send_file, url_for

app = Flask(__name__)

class SentenceTokenizer:
    """Advanced sentence tokenizer with support for complex cases."""
    
    def __init__(self):
        self.SENTENCE_END = re.compile(
            r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
            re.VERBOSE
        )

    def tokenize(self, text: str) -> List[str]:
        if not text or not text.strip():
            return []
        # Simple sentence splitting
        sentences = self.SENTENCE_END.split(text.strip())
        return [s.strip() for s in sentences if s.strip()]

def split_sentences(text: str) -> List[str]:
    tokenizer = SentenceTokenizer()
    return tokenizer.tokenize(text)

class ElevenlabsTTS:
    """Text-to-speech provider using Elevenlabs API."""
    
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({"User-Agent": "Mozilla/5.0"})
        self.cache_dir = Path(tempfile.gettempdir())
        self.all_voices = {
            "Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2", 
            "Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO", 
            "Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa", 
            "Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9", 
            "Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb", 
            "Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5", 
            "Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku", 
            "Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL", 
            "Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
        }
        self.params = {'allow_unauthenticated': '1'}

    def tts(self, text: str, voice: str = "Brian") -> str:
        if voice not in self.all_voices:
            raise ValueError(f"Voice '{voice}' not available")

        filename = self.cache_dir / f"tts_{int(time.time())}.mp3"
        sentences = split_sentences(text)

        audio_chunks = {}
        for i, sentence in enumerate(sentences, 1):
            json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
            response = self.session.post(
                f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
                params=self.params,
                json=json_data,
                timeout=20
            )
            response.raise_for_status()
            audio_chunks[i] = response.content

        combined_audio = BytesIO()
        for i in sorted(audio_chunks.keys()):
            combined_audio.write(audio_chunks[i])

        with open(filename, 'wb') as f:
            f.write(combined_audio.getvalue())
        return filename.as_posix()

# Serve static audio files
app.config['AUDIO_FOLDER'] = tempfile.gettempdir()

@app.route('/audio/<filename>')
def serve_audio(filename):
    return send_file(Path(app.config['AUDIO_FOLDER']) / filename, mimetype='audio/mpeg')

# Web Interface
tts_provider = ElevenlabsTTS()

@app.route('/', methods=['GET', 'POST'])
def index():
    audio_url = None
    download_filename = None
    error = None
    
    if request.method == 'POST':
        text = request.form.get('text')
        voice = request.form.get('voice', 'Brian')
        try:
            audio_file = tts_provider.tts(text, voice)
            filename = Path(audio_file).name
            audio_url = url_for('serve_audio', filename=filename)
            download_filename = f"{voice}_output.mp3"
        except Exception as e:
            error = str(e)
    
    return render_template('index.html', 
                         voices=tts_provider.all_voices.keys(),
                         audio_url=audio_url,
                         download_filename=download_filename,
                         error=error)

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)