Spaces:

Coco-18
/

Kapamtalk

Sleeping

File size: 5,856 Bytes

# app.py - Main application file

import os
import sys
import logging
import traceback

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("speech_api")

# Set all cache directories to locations within /tmp
cache_dirs = {
    "HF_HOME": "/tmp/hf_home",
    "TRANSFORMERS_CACHE": "/tmp/transformers_cache",
    "HUGGINGFACE_HUB_CACHE": "/tmp/huggingface_hub_cache",
    "TORCH_HOME": "/tmp/torch_home",
    "XDG_CACHE_HOME": "/tmp/xdg_cache"
}

# Set environment variables and create directories
for env_var, path in cache_dirs.items():
    os.environ[env_var] = path
    try:
        os.makedirs(path, exist_ok=True)
        logger.info(f"📁 Created cache directory: {path}")
    except Exception as e:
        logger.error(f"❌ Failed to create directory {path}: {str(e)}")

# Now import the rest of the libraries
try:
    import librosa
    import glob
    import numpy as np
    import torch
    from pydub import AudioSegment
    import tempfile
    import soundfile as sf
    from flask import Flask, request, jsonify, send_file, g
    from flask_cors import CORS
    from werkzeug.utils import secure_filename

    # Import functionality from other modules
    from translator import (
        init_models, check_model_status, handle_asr_request,
        handle_tts_request, handle_translation_request
    )
    from evaluate import (
        handle_evaluation_request, handle_upload_reference,
        init_reference_audio, calculate_similarity
    )

    logger.info("✅ All required libraries imported successfully")
except ImportError as e:
    logger.critical(f"❌ Failed to import necessary libraries: {str(e)}")
    sys.exit(1)

# Check CUDA availability
if torch.cuda.is_available():
    logger.info(f"🚀 CUDA available: {torch.cuda.get_device_name(0)}")
    device = "cuda"
else:
    logger.info("⚠️ CUDA not available, using CPU")
    device = "cpu"

# Constants
SAMPLE_RATE = 16000
OUTPUT_DIR = "/tmp/audio_outputs"
REFERENCE_AUDIO_DIR = "./reference_audio"

try:
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    logger.info(f"📁 Created output directory: {OUTPUT_DIR}")
except Exception as e:
    logger.error(f"❌ Failed to create output directory: {str(e)}")

# Initialize Flask app
app = Flask(__name__)
CORS(app)

# Load models
init_models(device)


# Define routes
@app.route("/", methods=["GET"])
def home():
    return jsonify({"message": "Speech API is running", "status": "active"})


@app.route("/health", methods=["GET"])
def health_check():
    health_status = check_model_status()
    health_status["api_status"] = "online"
    health_status["device"] = device
    return jsonify(health_status)


@app.route("/asr", methods=["POST"])
def transcribe_audio():
    return handle_asr_request(request, OUTPUT_DIR, SAMPLE_RATE)


@app.route("/tts", methods=["POST"])
def generate_tts():
    return handle_tts_request(request, OUTPUT_DIR)


@app.route("/translate", methods=["POST"])
def translate_text():
    return handle_translation_request(request)


@app.route("/download/<filename>", methods=["GET"])
def download_audio(filename):
    file_path = os.path.join(OUTPUT_DIR, filename)
    if os.path.exists(file_path):
        logger.info(f"📤 Serving audio file: {file_path}")
        return send_file(file_path, mimetype="audio/wav", as_attachment=True)

    logger.warning(f"⚠️ Requested file not found: {file_path}")
    return jsonify({"error": "File not found"}), 404


@app.route("/evaluate", methods=["POST"])
def evaluate_pronunciation():
    return handle_evaluation_request(request, REFERENCE_AUDIO_DIR, OUTPUT_DIR, SAMPLE_RATE)


@app.route("/check_references", methods=["GET"])
def check_references():
    """Endpoint to check if reference files exist and are accessible"""
    ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun",
                    "mayap_a_bengi", "komusta_ka"]
    results = {}

    for pattern in ref_patterns:
        pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern)
        if os.path.exists(pattern_dir):
            wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
            results[pattern] = {
                "exists": True,
                "path": pattern_dir,
                "file_count": len(wav_files),
                "files": [os.path.basename(f) for f in wav_files]
            }
        else:
            results[pattern] = {
                "exists": False,
                "path": pattern_dir
            }

    return jsonify({
        "reference_audio_dir": REFERENCE_AUDIO_DIR,
        "directory_exists": os.path.exists(REFERENCE_AUDIO_DIR),
        "patterns": results
    })


@app.route("/upload_reference", methods=["POST"])
def upload_reference_audio():
    return handle_upload_reference(request, REFERENCE_AUDIO_DIR, SAMPLE_RATE)


# Add an initialization route that will be called before the first request
@app.before_request
def before_request():
    if not hasattr(g, 'initialized'):
        init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
        g.initialized = True


if __name__ == "__main__":
    init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
    logger.info("🚀 Starting Speech API server")

    # Get the status for logging
    status = check_model_status()
    logger.info(f"📊 System status: ASR model: {'✅' if status['asr_model'] == 'loaded' else '❌'}")
    for lang, model_status in status['tts_models'].items():
        logger.info(f"📊 TTS model {lang}: {'✅' if model_status == 'loaded' else '❌'}")

    app.run(host="0.0.0.0", port=7860, debug=True)