Niansuh commited on
Commit
dc0538c
·
verified ·
1 Parent(s): 213867b

Rename tts_script.py to app.py

Browse files
Files changed (2) hide show
  1. app.py +89 -0
  2. tts_script.py +0 -81
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+ from typing import List
6
+ import re
7
+ from flask import Flask, request, render_template, send_file
8
+
9
+ app = Flask(__name__)
10
+
11
+ class SentenceTokenizer:
12
+ """Advanced sentence tokenizer with support for complex cases."""
13
+
14
+ def __init__(self):
15
+ self.SENTENCE_END = re.compile(
16
+ r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
17
+ re.VERBOSE
18
+ )
19
+
20
+ def tokenize(self, text: str) -> List[str]:
21
+ if not text or not text.strip():
22
+ return []
23
+ # Simple sentence splitting
24
+ sentences = self.SENTENCE_END.split(text.strip())
25
+ return [s.strip() for s in sentences if s.strip()]
26
+
27
+ def split_sentences(text: str) -> List[str]:
28
+ tokenizer = SentenceTokenizer()
29
+ return tokenizer.tokenize(text)
30
+
31
+ class ElevenlabsTTS:
32
+ """Text-to-speech provider using Elevenlabs API."""
33
+
34
+ def __init__(self):
35
+ self.session = requests.Session()
36
+ self.session.headers.update({"User-Agent": "Mozilla/5.0"})
37
+ self.cache_dir = Path("./audio_cache")
38
+ self.all_voices = {
39
+ "Brian": "nPczCjzI2devNBz1zQrb",
40
+ "Alice": "Xb7hH8MSUJpSbSDYk0k2",
41
+ # Add other voices as needed
42
+ }
43
+ self.params = {'allow_unauthenticated': '1'}
44
+
45
+ def tts(self, text: str, voice: str = "Brian") -> str:
46
+ if voice not in self.all_voices:
47
+ raise ValueError(f"Voice '{voice}' not available")
48
+
49
+ filename = self.cache_dir / f"{int(time.time())}.mp3"
50
+ sentences = split_sentences(text)
51
+
52
+ audio_chunks = {}
53
+ for i, sentence in enumerate(sentences, 1):
54
+ json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
55
+ response = self.session.post(
56
+ f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
57
+ params=self.params,
58
+ json=json_data,
59
+ timeout=20
60
+ )
61
+ response.raise_for_status()
62
+ audio_chunks[i] = response.content
63
+
64
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
65
+ combined_audio = BytesIO()
66
+ for i in sorted(audio_chunks.keys()):
67
+ combined_audio.write(audio_chunks[i])
68
+
69
+ with open(filename, 'wb') as f:
70
+ f.write(combined_audio.getvalue())
71
+ return filename.as_posix()
72
+
73
+ # Web Interface
74
+ tts_provider = ElevenlabsTTS()
75
+
76
+ @app.route('/', methods=['GET', 'POST'])
77
+ def index():
78
+ if request.method == 'POST':
79
+ text = request.form.get('text')
80
+ voice = request.form.get('voice', 'Brian')
81
+ try:
82
+ audio_file = tts_provider.tts(text, voice)
83
+ return send_file(audio_file, mimetype='audio/mpeg', as_attachment=True)
84
+ except Exception as e:
85
+ return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys())
86
+ return render_template('index.html', voices=tts_provider.all_voices.keys())
87
+
88
+ if __name__ == "__main__":
89
+ app.run(host='0.0.0.0', port=5000)
tts_script.py DELETED
@@ -1,81 +0,0 @@
1
- import time
2
- import requests
3
- import pathlib
4
- from io import BytesIO
5
- from flask import Flask, request, render_template, send_file
6
- from pydub import AudioSegment
7
-
8
- # Flask App Setup
9
- app = Flask(__name__)
10
-
11
- # ElevenLabs API Configuration
12
- ELEVENLABS_API_URL = "https://api.elevenlabs.io/v1/text-to-speech"
13
- HEADERS = {"User-Agent": "TTSApp"}
14
- CACHE_DIR = pathlib.Path("/tmp/audio_cache")
15
- CACHE_DIR.mkdir(parents=True, exist_ok=True)
16
-
17
- # Available Voices
18
- ALL_VOICES = {
19
- "Brian": "nPczCjzI2devNBz1zQrb",
20
- "Alice": "Xb7hH8MSUJpSbSDYk0k2",
21
- "Will": "bIHbv24MWmeRgasZH58o",
22
- }
23
-
24
- # Split text into sentences (Basic)
25
- def split_sentences(text):
26
- return text.split(". ")
27
-
28
- # Generate TTS
29
- def generate_audio(text, voice):
30
- if voice not in ALL_VOICES:
31
- return {"error": f"Invalid voice '{voice}'"}
32
-
33
- filename = CACHE_DIR / f"{int(time.time())}.mp3"
34
- sentences = split_sentences(text)
35
-
36
- audio_chunks = []
37
- for sentence in sentences:
38
- response = requests.post(
39
- f"{ELEVENLABS_API_URL}/{ALL_VOICES[voice]}",
40
- headers=HEADERS,
41
- json={"text": sentence, "model_id": "eleven_multilingual_v2"},
42
- timeout=20
43
- )
44
- if response.ok:
45
- audio_chunks.append(BytesIO(response.content))
46
-
47
- # Combine all audio parts
48
- combined_audio = AudioSegment.empty()
49
- for chunk in audio_chunks:
50
- chunk_audio = AudioSegment.from_file(chunk, format="mp3")
51
- combined_audio += chunk_audio
52
-
53
- combined_audio.export(filename, format="mp3")
54
- return filename.as_posix()
55
-
56
- # Flask Routes
57
- @app.route("/", methods=["GET", "POST"])
58
- def home():
59
- if request.method == "POST":
60
- text = request.form["text"]
61
- voice = request.form.get("voice", "Brian")
62
- audio_file = generate_audio(text, voice)
63
- return send_file(audio_file, as_attachment=True)
64
-
65
- return """
66
- <h1>Text-to-Speech Generator</h1>
67
- <form method="post">
68
- <label>Text:</label><br>
69
- <textarea name="text" rows="4" cols="50" required></textarea><br>
70
- <label>Voice:</label>
71
- <select name="voice">
72
- <option value="Brian">Brian</option>
73
- <option value="Alice">Alice</option>
74
- <option value="Will">Will</option>
75
- </select><br><br>
76
- <button type="submit">Generate & Download</button>
77
- </form>
78
- """
79
-
80
- if __name__ == "__main__":
81
- app.run(debug=True, host="0.0.0.0", port=5000)