File size: 3,947 Bytes
8cb6f0a
036e058
 
 
 
 
 
8cb6f0a
6ddbbc3
8cb6f0a
6ddbbc3
 
8cb6f0a
 
661887e
036e058
661887e
6ddbbc3
661887e
 
 
 
 
 
 
 
 
6ddbbc3
8cb6f0a
6ddbbc3
 
 
 
036e058
8cb6f0a
6ddbbc3
 
fe51424
661887e
036e058
 
 
 
661887e
fe51424
 
6aa3d97
fe51424
 
661887e
 
6aa3d97
036e058
 
 
 
 
661887e
036e058
 
 
 
 
 
 
fe51424
036e058
661887e
036e058
fe51424
036e058
fe51424
661887e
036e058
fe51424
 
6aa3d97
 
 
 
 
 
661887e
036e058
fe51424
 
036e058
 
fe51424
 
036e058
fe51424
 
036e058
6aa3d97
661887e
036e058
 
6aa3d97
661887e
 
036e058
661887e
 
 
 
fe51424
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import torch
import torchaudio
import soundfile as sf
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from transformers import VitsModel, AutoTokenizer

# Set ALL cache directories to /tmp (writable in Hugging Face Spaces)
os.environ["HF_HOME"] = "/tmp/hf_home"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
os.environ["TORCH_HOME"] = "/tmp/torch_home"

app = Flask(__name__)
CORS(app)  # Allow external requests

# Model paths for different languages (Hugging Face Hub)
MODELS = {
    "kapampangan": "facebook/mms-tts-pam",
    "tagalog": "facebook/mms-tts-tgl",
    "english": "facebook/mms-tts-eng"
}

loaded_models = {}
loaded_processors = {}

for lang, path in MODELS.items():
    try:
        print(f"πŸ”„ Loading {lang} model: {path}...")
        # Force models to save in /tmp
        loaded_models[lang] = VitsModel.from_pretrained(path, cache_dir="/tmp/huggingface_cache")
        loaded_processors[lang] = AutoTokenizer.from_pretrained(path, cache_dir="/tmp/huggingface_cache")
        print(f"βœ… {lang.capitalize()} model loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading {lang} model: {str(e)}")
        loaded_models[lang] = None  # Mark as unavailable
        loaded_processors[lang] = None

# Constants
SAMPLE_RATE = 16000
OUTPUT_DIR = "/tmp/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

@app.route("/", methods=["GET"])
def home():
    """Root route to check if the API is running"""
    return jsonify({"message": "TTS API is running. Use /tts to generate speech."})

@app.route("/tts", methods=["POST"])
def generate_tts():
    """API endpoint to generate TTS audio"""
    try:
        # Get request data
        data = request.get_json()
        text_input = data.get("text", "").strip()
        language = data.get("language", "kapampangan").lower()

        # Validate inputs
        if language not in MODELS:
            return jsonify({"error": "Invalid language. Choose 'kapampangan', 'tagalog', or 'english'."}), 400
        if not text_input:
            return jsonify({"error": "No text provided"}), 400
        if loaded_models[language] is None:
            return jsonify({"error": f"Model for {language} failed to load"}), 500

        print(f"πŸ”„ Generating speech for '{text_input}' in {language}...")

        # Process text input
        processor = loaded_processors[language]
        model = loaded_models[language]
        inputs = processor(text_input, return_tensors="pt")

        # Generate speech
        with torch.no_grad():
            output = model.generate(**inputs)
            # For VITS models, the output is typically a waveform
            # Check if output is a tuple/list or a single tensor
            if isinstance(output, tuple) or isinstance(output, list):
                waveform = output[0].cpu().numpy().squeeze()
            else:
                waveform = output.cpu().numpy().squeeze()

        # Save to file
        output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
        sf.write(output_filename, waveform, SAMPLE_RATE)
        print(f"βœ… Speech generated! File saved: {output_filename}")

        return jsonify({
            "message": "TTS audio generated",
            "file_url": f"/download/{language}_output.wav"
        })
    except Exception as e:
        print(f"❌ Error generating TTS: {e}")
        return jsonify({"error": f"Internal server error: {str(e)}"}), 500

@app.route("/download/<filename>", methods=["GET"])
def download_audio(filename):
    """Serve generated audio files"""
    file_path = os.path.join(OUTPUT_DIR, filename)
    if os.path.exists(file_path):
        return send_file(file_path, mimetype="audio/wav", as_attachment=True)
    return jsonify({"error": "File not found"}), 404

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)