File size: 5,856 Bytes
0812080 9c8a2cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# app.py - Main application file
import os
import sys
import logging
import traceback
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("speech_api")
# Set all cache directories to locations within /tmp
cache_dirs = {
"HF_HOME": "/tmp/hf_home",
"TRANSFORMERS_CACHE": "/tmp/transformers_cache",
"HUGGINGFACE_HUB_CACHE": "/tmp/huggingface_hub_cache",
"TORCH_HOME": "/tmp/torch_home",
"XDG_CACHE_HOME": "/tmp/xdg_cache"
}
# Set environment variables and create directories
for env_var, path in cache_dirs.items():
os.environ[env_var] = path
try:
os.makedirs(path, exist_ok=True)
logger.info(f"π Created cache directory: {path}")
except Exception as e:
logger.error(f"β Failed to create directory {path}: {str(e)}")
# Now import the rest of the libraries
try:
import librosa
import glob
import numpy as np
import torch
from pydub import AudioSegment
import tempfile
import soundfile as sf
from flask import Flask, request, jsonify, send_file, g
from flask_cors import CORS
from werkzeug.utils import secure_filename
# Import functionality from other modules
from translator import (
init_models, check_model_status, handle_asr_request,
handle_tts_request, handle_translation_request
)
from evaluate import (
handle_evaluation_request, handle_upload_reference,
init_reference_audio, calculate_similarity
)
logger.info("β
All required libraries imported successfully")
except ImportError as e:
logger.critical(f"β Failed to import necessary libraries: {str(e)}")
sys.exit(1)
# Check CUDA availability
if torch.cuda.is_available():
logger.info(f"π CUDA available: {torch.cuda.get_device_name(0)}")
device = "cuda"
else:
logger.info("β οΈ CUDA not available, using CPU")
device = "cpu"
# Constants
SAMPLE_RATE = 16000
OUTPUT_DIR = "/tmp/audio_outputs"
REFERENCE_AUDIO_DIR = "./reference_audio"
try:
os.makedirs(OUTPUT_DIR, exist_ok=True)
logger.info(f"π Created output directory: {OUTPUT_DIR}")
except Exception as e:
logger.error(f"β Failed to create output directory: {str(e)}")
# Initialize Flask app
app = Flask(__name__)
CORS(app)
# Load models
init_models(device)
# Define routes
@app.route("/", methods=["GET"])
def home():
return jsonify({"message": "Speech API is running", "status": "active"})
@app.route("/health", methods=["GET"])
def health_check():
health_status = check_model_status()
health_status["api_status"] = "online"
health_status["device"] = device
return jsonify(health_status)
@app.route("/asr", methods=["POST"])
def transcribe_audio():
return handle_asr_request(request, OUTPUT_DIR, SAMPLE_RATE)
@app.route("/tts", methods=["POST"])
def generate_tts():
return handle_tts_request(request, OUTPUT_DIR)
@app.route("/translate", methods=["POST"])
def translate_text():
return handle_translation_request(request)
@app.route("/download/<filename>", methods=["GET"])
def download_audio(filename):
file_path = os.path.join(OUTPUT_DIR, filename)
if os.path.exists(file_path):
logger.info(f"π€ Serving audio file: {file_path}")
return send_file(file_path, mimetype="audio/wav", as_attachment=True)
logger.warning(f"β οΈ Requested file not found: {file_path}")
return jsonify({"error": "File not found"}), 404
@app.route("/evaluate", methods=["POST"])
def evaluate_pronunciation():
return handle_evaluation_request(request, REFERENCE_AUDIO_DIR, OUTPUT_DIR, SAMPLE_RATE)
@app.route("/check_references", methods=["GET"])
def check_references():
"""Endpoint to check if reference files exist and are accessible"""
ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun",
"mayap_a_bengi", "komusta_ka"]
results = {}
for pattern in ref_patterns:
pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern)
if os.path.exists(pattern_dir):
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
results[pattern] = {
"exists": True,
"path": pattern_dir,
"file_count": len(wav_files),
"files": [os.path.basename(f) for f in wav_files]
}
else:
results[pattern] = {
"exists": False,
"path": pattern_dir
}
return jsonify({
"reference_audio_dir": REFERENCE_AUDIO_DIR,
"directory_exists": os.path.exists(REFERENCE_AUDIO_DIR),
"patterns": results
})
@app.route("/upload_reference", methods=["POST"])
def upload_reference_audio():
return handle_upload_reference(request, REFERENCE_AUDIO_DIR, SAMPLE_RATE)
# Add an initialization route that will be called before the first request
@app.before_request
def before_request():
if not hasattr(g, 'initialized'):
init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
g.initialized = True
if __name__ == "__main__":
init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
logger.info("π Starting Speech API server")
# Get the status for logging
status = check_model_status()
logger.info(f"π System status: ASR model: {'β
' if status['asr_model'] == 'loaded' else 'β'}")
for lang, model_status in status['tts_models'].items():
logger.info(f"π TTS model {lang}: {'β
' if model_status == 'loaded' else 'β'}")
app.run(host="0.0.0.0", port=7860, debug=True) |