import os
import torch
import torchaudio
import soundfile as sf
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from transformers import VitsModel, AutoTokenizer

# Set ALL cache directories to /tmp (writable in Hugging Face Spaces)
os.environ["HF_HOME"] = "/tmp/hf_home"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface_cache"
os.environ["TORCH_HOME"] = "/tmp/torch_home"

app = Flask(__name__)
CORS(app)  # Allow external requests

# Model paths for different languages (Hugging Face Hub)
MODELS = {
    "kapampangan": "facebook/mms-tts-pam",
    "tagalog": "facebook/mms-tts-tgl",
    "english": "facebook/mms-tts-eng"
}

loaded_models = {}
loaded_processors = {}

for lang, path in MODELS.items():
    try:
        print(f"🔄 Loading {lang} model: {path}...")
        # Force models to save in /tmp
        loaded_models[lang] = VitsModel.from_pretrained(path, cache_dir="/tmp/huggingface_cache")
        loaded_processors[lang] = AutoTokenizer.from_pretrained(path, cache_dir="/tmp/huggingface_cache")
        print(f"✅ {lang.capitalize()} model loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading {lang} model: {str(e)}")
        loaded_models[lang] = None  # Mark as unavailable
        loaded_processors[lang] = None

# Constants
SAMPLE_RATE = 16000
OUTPUT_DIR = "/tmp/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

@app.route("/", methods=["GET"])
def home():
    """Root route to check if the API is running"""
    return jsonify({"message": "TTS API is running. Use /tts to generate speech."})

@app.route("/tts", methods=["POST"])
def generate_tts():
    """API endpoint to generate TTS audio"""
    try:
        # Get request data
        data = request.get_json()
        text_input = data.get("text", "").strip()
        language = data.get("language", "kapampangan").lower()

        # Validate inputs
        if language not in MODELS:
            return jsonify({"error": "Invalid language. Choose 'kapampangan', 'tagalog', or 'english'."}), 400
        if not text_input:
            return jsonify({"error": "No text provided"}), 400
        if loaded_models[language] is None:
            return jsonify({"error": f"Model for {language} failed to load"}), 500

        print(f"🔄 Generating speech for '{text_input}' in {language}...")

        # Process text input
        processor = loaded_processors[language]
        model = loaded_models[language]
        inputs = processor(text_input, return_tensors="pt")

        # Generate speech
        with torch.no_grad():
            output = model.generate(**inputs)
            # For VITS models, the output is typically a waveform
            # Check if output is a tuple/list or a single tensor
            if isinstance(output, tuple) or isinstance(output, list):
                waveform = output[0].cpu().numpy().squeeze()
            else:
                waveform = output.cpu().numpy().squeeze()

        # Save to file
        output_filename = os.path.join(OUTPUT_DIR, f"{language}_output.wav")
        sf.write(output_filename, waveform, SAMPLE_RATE)
        print(f"✅ Speech generated! File saved: {output_filename}")

        return jsonify({
            "message": "TTS audio generated",
            "file_url": f"/download/{language}_output.wav"
        })
    except Exception as e:
        print(f"❌ Error generating TTS: {e}")
        return jsonify({"error": f"Internal server error: {str(e)}"}), 500

@app.route("/download/<filename>", methods=["GET"])
def download_audio(filename):
    """Serve generated audio files"""
    file_path = os.path.join(OUTPUT_DIR, filename)
    if os.path.exists(file_path):
        return send_file(file_path, mimetype="audio/wav", as_attachment=True)
    return jsonify({"error": "File not found"}), 404

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)