File size: 4,541 Bytes
dc0538c
 
 
 
 
 
c1d9f4f
791fe32
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791fe32
 
 
 
c1d9f4f
dc0538c
8662041
 
 
 
 
 
 
 
 
dc0538c
8662041
791fe32
 
 
dc0538c
 
 
 
 
8662041
dc0538c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8662041
 
 
 
 
 
dc0538c
 
 
 
 
 
8662041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc0538c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
import os
from flask import Flask, request, render_template, send_file

app = Flask(__name__)

class SentenceTokenizer:
    """Advanced sentence tokenizer with support for complex cases."""
    
    def __init__(self):
        self.SENTENCE_END = re.compile(
            r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
            re.VERBOSE
        )

    def tokenize(self, text: str) -> List[str]:
        if not text or not text.strip():
            return []
        sentences = self.SENTENCE_END.split(text.strip())
        return [s.strip() for s in sentences if s.strip()]

def split_sentences(text: str) -> List[str]:
    tokenizer = SentenceTokenizer()
    return tokenizer.tokenize(text)

class ElevenlabsTTS:
    """Text-to-speech provider using Elevenlabs API."""
    
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0",
            "xi-api-key": os.getenv("ELEVENLABS_API_KEY")  # Get API key from environment
        })
        self.cache_dir = Path(tempfile.gettempdir())
        self.all_voices = {
            "Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2",
            "Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO",
            "Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa",
            "Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9",
            "Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb",
            "Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5",
            "Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku",
            "Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL",
            "Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
        }
        self.preview_text = "Hello, this is a sample of my voice."
        # Check if API key is provided
        if not os.getenv("ELEVENLABS_API_KEY"):
            raise ValueError("ELEVENLABS_API_KEY environment variable is not set")

    def tts(self, text: str, voice: str = "Brian") -> str:
        if voice not in self.all_voices:
            raise ValueError(f"Voice '{voice}' not available")

        filename = self.cache_dir / f"tts_{voice}_{int(time.time())}.mp3"
        sentences = split_sentences(text)

        audio_chunks = {}
        for i, sentence in enumerate(sentences, 1):
            json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
            response = self.session.post(
                f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
                json=json_data,
                timeout=20
            )
            response.raise_for_status()
            audio_chunks[i] = response.content

        combined_audio = BytesIO()
        for i in sorted(audio_chunks.keys()):
            combined_audio.write(audio_chunks[i])

        with open(filename, 'wb') as f:
            f.write(combined_audio.getvalue())
        return filename.as_posix()

    def generate_preview(self, voice: str) -> str:
        preview_file = self.cache_dir / f"preview_{voice}.mp3"
        if not preview_file.exists():
            return self.tts(self.preview_text, voice)
        return preview_file.as_posix()

# Web Interface
tts_provider = ElevenlabsTTS()

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        if 'generate' in request.form:
            text = request.form.get('text')
            voice = request.form.get('voice', 'Brian')
            try:
                audio_file = tts_provider.tts(text, voice)
                return send_file(audio_file, mimetype='audio/mpeg', as_attachment=True, download_name=f"{voice}_output.mp3")
            except Exception as e:
                return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys())
    previews = {voice: tts_provider.generate_preview(voice) for voice in tts_provider.all_voices.keys()}
    return render_template('index.html', voices=tts_provider.all_voices.keys(), previews=previews)

@app.route('/preview/<voice>')
def preview(voice):
    try:
        audio_file = tts_provider.generate_preview(voice)
        return send_file(audio_file, mimetype='audio/mpeg')
    except Exception as e:
        return str(e), 500

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=5000)