|
import os |
|
import torch |
|
import torchaudio |
|
import soundfile as sf |
|
from flask import Flask, request, jsonify, send_file |
|
from flask_cors import CORS |
|
from transformers import VitsModel, AutoTokenizer |
|
|
|
|
|
os.environ["HF_HOME"] = "/tmp/hf_home" |
|
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache" |
|
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache" |
|
os.environ["TORCH_HOME"] = "/tmp/torch_home" |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
MODELS = { |
|
"kapampangan": "facebook/mms-tts-pam", |
|
"tagalog": "facebook/mms-tts-tgl", |
|
"english": "facebook/mms-tts-eng" |
|
} |
|
|
|
loaded_models = {} |
|
loaded_processors = {} |
|
|
|
for lang, path in MODELS.items(): |
|
try: |
|
print(f"π Loading {lang} model: {path}...") |
|
|
|
loaded_models[lang] = VitsModel.from_pretrained(path, cache_dir="/tmp/huggingface_cache") |
|
loaded_processors[lang] = AutoTokenizer.from_pretrained(path, cache_dir="/tmp/huggingface_cache") |
|
print(f"β
{lang.capitalize()} model loaded successfully!") |
|
except Exception as e: |
|
print(f"β Error loading {lang} model: {str(e)}") |
|
loaded_models[lang] = None |
|
loaded_processors[lang] = None |
|
|
|
|
|
OUTPUT_DIR = "/tmp/" |
|
os.makedirs(OUTPUT_DIR, exist_ok=True) |
|
|
|
@app.route("/", methods=["GET"]) |
|
def home(): |
|
"""Root route to check if the API is running""" |
|
return jsonify({"message": "TTS API is running. Use /tts to generate speech."}) |
|
|
|
@app.route("/tts", methods=["POST"]) |
|
def generate_tts(): |
|
"""API endpoint to generate TTS audio""" |
|
try: |
|
|
|
data = request.get_json() |
|
text_input = data.get("text", "").strip() |
|
language = data.get("language", "kapampangan").lower() |
|
|
|
|
|
if language not in MODELS: |
|
return jsonify({"error": "Invalid language. Choose 'kapampangan', 'tagalog', or 'english'."}), 400 |
|
if not text_input: |
|
return jsonify({"error": "No text provided"}), 400 |
|
if loaded_models[language] is None: |
|
return jsonify({"error": f"Model for {language} failed to load"}), 500 |
|
|
|
print(f"π Generating speech for '{text_input}' in {language}...") |
|
|
|
|
|
processor = loaded_processors[language] |
|
model = loaded_models[language] |
|
inputs = processor(text_input, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(**inputs).waveform |
|
waveform = output.squeeze().cpu().numpy() |
|
|
|
|
|
output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav") |
|
|
|
sampling_rate = model.config.sampling_rate |
|
sf.write(output_filename, waveform, sampling_rate) |
|
print(f"β
Speech generated! File saved: {output_filename}") |
|
|
|
return jsonify({ |
|
"message": "TTS audio generated", |
|
"file_url": f"/download/{language}_output.wav" |
|
}) |
|
except Exception as e: |
|
print(f"β Error generating TTS: {e}") |
|
return jsonify({"error": f"Internal server error: {str(e)}"}), 500 |
|
|
|
@app.route("/download/<filename>", methods=["GET"]) |
|
def download_audio(filename): |
|
"""Serve generated audio files""" |
|
file_path = os.path.join(OUTPUT_DIR, filename) |
|
if os.path.exists(file_path): |
|
return send_file(file_path, mimetype="audio/wav", as_attachment=True) |
|
return jsonify({"error": "File not found"}), 404 |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860, debug=True) |
|
|
|
|