Kapamtalk / app.py
Coco-18's picture
Create app.py
661887e verified
raw
history blame
2.47 kB
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import torch
import torchaudio
import soundfile as sf
import os
from transformers import VitsModel, AutoTokenizer
app = Flask(__name__)
CORS(app) # Allow Android requests
# Model paths for different languages (Hugging Face Hub)
MODELS = {
"kapampangan": "facebook/mms-tts-pam",
"tagalog": "facebook/mms-tts-tgl",
"english": "facebook/mms-tts-eng"
}
# Load models and processors
loaded_models = {}
loaded_processors = {}
for lang, path in MODELS.items():
print(f"Loading {lang} model from Hugging Face: {path}...")
loaded_models[lang] = VitsModel.from_pretrained(path)
loaded_processors[lang] = AutoTokenizer.from_pretrained(path)
print(f"{lang.capitalize()} model loaded successfully!")
SAMPLE_RATE = 16000 # Default sample rate
OUTPUT_DIR = "/tmp/" # Use /tmp for Hugging Face Spaces (limited storage)
@app.route("/tts", methods=["POST"])
def generate_tts():
""" API endpoint to generate speech based on the selected language. """
data = request.get_json()
text_input = data.get("text", "")
language = data.get("language", "kapampangan").lower()
if language not in MODELS:
return jsonify({"error": "Invalid language. Choose 'kapampangan', 'tagalog', or 'english'."}), 400
if not text_input:
return jsonify({"error": "No text provided"}), 400
print(f"Generating speech for: '{text_input}' in {language}")
# Select the correct model and processor
model = loaded_models[language]
processor = loaded_processors[language]
# Tokenize input text
inputs = processor(text_input, return_tensors="pt")
# Generate audio
with torch.no_grad():
output = model.generate(**inputs)
waveform = output.cpu().numpy().flatten()
# Save as WAV file
output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
sf.write(output_filename, waveform, SAMPLE_RATE)
return jsonify({
"message": "TTS audio generated",
"file_url": f"/static/{language}_output.wav"
})
@app.route("/static/<filename>")
def serve_audio(filename):
""" Serve the generated WAV file. """
file_path = os.path.join(OUTPUT_DIR, filename)
if os.path.exists(file_path):
return send_file(file_path, mimetype="audio/wav")
return jsonify({"error": "File not found"}), 404
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)