File size: 3,780 Bytes
dc0538c
 
 
 
 
 
c1d9f4f
e65b5bc
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
 
 
 
 
 
ed55f2d
c1d9f4f
dc0538c
ed55f2d
 
 
 
 
 
 
 
8662041
dc0538c
ed55f2d
dc0538c
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
ed55f2d
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2d49c3
 
 
 
 
 
 
 
 
 
 
e65b5bc
dc0538c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
from flask import Flask, request, render_template, send_file

app = Flask(__name__)

class SentenceTokenizer:
    """Advanced sentence tokenizer with support for complex cases."""
    
    def __init__(self):
        self.SENTENCE_END = re.compile(
            r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
            re.VERBOSE
        )

    def tokenize(self, text: str) -> List[str]:
        if not text or not text.strip():
            return []
        # Simple sentence splitting
        sentences = self.SENTENCE_END.split(text.strip())
        return [s.strip() for s in sentences if s.strip()]

def split_sentences(text: str) -> List[str]:
    tokenizer = SentenceTokenizer()
    return tokenizer.tokenize(text)

class ElevenlabsTTS:
    """Text-to-speech provider using Elevenlabs API."""
    
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({"User-Agent": "Mozilla/5.0"})
        self.cache_dir = Path(tempfile.gettempdir())
        self.all_voices = {
            "Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2", 
            "Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO", 
            "Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa", 
            "Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9", 
            "Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb", 
            "Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5", 
            "Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku", 
            "Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL", 
            "Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
        }
        self.params = {'allow_unauthenticated': '1'}

    def tts(self, text: str, voice: str = "Brian") -> str:
        if voice not in self.all_voices:
            raise ValueError(f"Voice '{voice}' not available")

        filename = self.cache_dir / f"tts_{int(time.time())}.mp3"
        sentences = split_sentences(text)

        audio_chunks = {}
        for i, sentence in enumerate(sentences, 1):
            json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
            response = self.session.post(
                f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
                params=self.params,
                json=json_data,
                timeout=20
            )
            response.raise_for_status()
            audio_chunks[i] = response.content

        combined_audio = BytesIO()
        for i in sorted(audio_chunks.keys()):
            combined_audio.write(audio_chunks[i])

        with open(filename, 'wb') as f:
            f.write(combined_audio.getvalue())
        return filename.as_posix()

# Web Interface
tts_provider = ElevenlabsTTS()

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        text = request.form.get('text')
        voice = request.form.get('voice', 'Brian')
        try:
            audio_file = tts_provider.tts(text, voice)
            return render_template('index.html', 
                                audio_file=audio_file, 
                                voices=tts_provider.all_voices.keys(),
                                text=text,
                                voice=voice)
        except Exception as e:
            return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys())
    return render_template('index.html', voices=tts_provider.all_voices.keys())

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)