|
|
|
|
|
import os |
|
import sys |
|
import logging |
|
import traceback |
|
import time |
|
import uuid |
|
import threading |
|
from functools import lru_cache |
|
import concurrent.futures |
|
from collections import defaultdict, deque |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - [%(thread)d] %(message)s', |
|
datefmt='%Y-%m-%d %H:%M:%S' |
|
) |
|
logger = logging.getLogger("speech_api") |
|
|
|
|
|
REQUEST_HISTORY = defaultdict(deque) |
|
RATE_LIMIT_WINDOW = 60 |
|
MAX_REQUESTS_PER_WINDOW = 15 |
|
rate_limit_lock = threading.Lock() |
|
|
|
|
|
MAX_WORKERS = 3 |
|
worker_pool = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) |
|
|
|
|
|
cache_dirs = { |
|
"HF_HOME": "/tmp/hf_home", |
|
"TRANSFORMERS_CACHE": "/tmp/transformers_cache", |
|
"HUGGINGFACE_HUB_CACHE": "/tmp/huggingface_hub_cache", |
|
"TORCH_HOME": "/tmp/torch_home", |
|
"XDG_CACHE_HOME": "/tmp/xdg_cache" |
|
} |
|
|
|
|
|
for env_var, path in cache_dirs.items(): |
|
os.environ[env_var] = path |
|
try: |
|
os.makedirs(path, exist_ok=True) |
|
logger.info(f"π Created cache directory: {path}") |
|
except Exception as e: |
|
logger.error(f"β Failed to create directory {path}: {str(e)}") |
|
|
|
|
|
try: |
|
import librosa |
|
import glob |
|
import numpy as np |
|
import torch |
|
from pydub import AudioSegment |
|
import tempfile |
|
import soundfile as sf |
|
from flask import Flask, request, jsonify, send_file, g |
|
from flask_cors import CORS |
|
from werkzeug.utils import secure_filename |
|
|
|
|
|
from translator import ( |
|
init_models, check_model_status, handle_asr_request, |
|
handle_tts_request, handle_translation_request |
|
) |
|
from evaluate import ( |
|
handle_evaluation_request, handle_upload_reference, |
|
init_reference_audio, calculate_similarity |
|
) |
|
|
|
logger.info("β
All required libraries imported successfully") |
|
except ImportError as e: |
|
logger.critical(f"β Failed to import necessary libraries: {str(e)}") |
|
sys.exit(1) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
logger.info(f"π CUDA available: {torch.cuda.get_device_name(0)}") |
|
device = "cuda" |
|
|
|
torch.cuda.empty_cache() |
|
|
|
torch.cuda.set_per_process_memory_fraction(0.7) |
|
torch.backends.cudnn.benchmark = True |
|
else: |
|
logger.info("β οΈ CUDA not available, using CPU") |
|
device = "cpu" |
|
|
|
|
|
SAMPLE_RATE = 16000 |
|
OUTPUT_DIR = "/tmp/audio_outputs" |
|
REFERENCE_AUDIO_DIR = "./reference_audios" |
|
MAX_CACHE_SIZE = 50 |
|
|
|
|
|
asr_cache = {} |
|
tts_cache = {} |
|
translation_cache = {} |
|
|
|
try: |
|
os.makedirs(OUTPUT_DIR, exist_ok=True) |
|
logger.info(f"π Created output directory: {OUTPUT_DIR}") |
|
except Exception as e: |
|
logger.error(f"β Failed to create output directory: {str(e)}") |
|
|
|
|
|
def get_user_output_dir(user_id=None): |
|
"""Create and return a user-specific output directory""" |
|
if user_id is None: |
|
user_id = str(uuid.uuid4())[:8] |
|
|
|
user_dir = os.path.join(OUTPUT_DIR, user_id) |
|
os.makedirs(user_dir, exist_ok=True) |
|
return user_dir |
|
|
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 |
|
|
|
|
|
init_models(device) |
|
|
|
|
|
def rate_limit(f): |
|
from functools import wraps |
|
|
|
@wraps(f) |
|
def decorated_function(*args, **kwargs): |
|
client_ip = request.remote_addr or request.headers.get('X-Forwarded-For', 'unknown') |
|
|
|
with rate_limit_lock: |
|
current_time = time.time() |
|
|
|
|
|
if client_ip not in REQUEST_HISTORY: |
|
REQUEST_HISTORY[client_ip] = deque(maxlen=MAX_REQUESTS_PER_WINDOW) |
|
|
|
|
|
while REQUEST_HISTORY[client_ip] and current_time - REQUEST_HISTORY[client_ip][0] > RATE_LIMIT_WINDOW: |
|
REQUEST_HISTORY[client_ip].popleft() |
|
|
|
|
|
if len(REQUEST_HISTORY[client_ip]) >= MAX_REQUESTS_PER_WINDOW: |
|
logger.warning(f"β οΈ Rate limit exceeded for {client_ip}") |
|
return jsonify({ |
|
"error": "Rate limit exceeded", |
|
"message": "Too many requests, please try again later" |
|
}), 429 |
|
|
|
|
|
REQUEST_HISTORY[client_ip].append(current_time) |
|
|
|
return f(*args, **kwargs) |
|
|
|
return decorated_function |
|
|
|
def compute_hash(data): |
|
"""Compute a hash for caching purposes""" |
|
import hashlib |
|
if isinstance(data, str): |
|
return hashlib.md5(data.encode('utf-8')).hexdigest() |
|
return hashlib.md5(str(data).encode('utf-8')).hexdigest() |
|
|
|
|
|
def cache_response(cache_dict, key_fn, max_size=MAX_CACHE_SIZE): |
|
def decorator(f): |
|
def wrapper(*args, **kwargs): |
|
key = key_fn(*args, **kwargs) |
|
|
|
|
|
if key in cache_dict: |
|
logger.info(f"β
Cache hit for {f.__name__}") |
|
return cache_dict[key] |
|
|
|
|
|
response = f(*args, **kwargs) |
|
|
|
|
|
if isinstance(response, tuple): |
|
result, status_code = response |
|
if status_code < 400: |
|
cache_dict[key] = response |
|
else: |
|
cache_dict[key] = response |
|
|
|
|
|
if len(cache_dict) > max_size: |
|
|
|
cache_dict.pop(next(iter(cache_dict))) |
|
|
|
return response |
|
return wrapper |
|
return decorator |
|
|
|
|
|
@app.before_request |
|
def before_request(): |
|
g.request_id = str(uuid.uuid4())[:8] |
|
g.start_time = time.time() |
|
|
|
|
|
if not hasattr(g, 'initialized'): |
|
global REFERENCE_AUDIO_DIR |
|
|
|
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR) |
|
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR: |
|
REFERENCE_AUDIO_DIR = updated_ref_dir |
|
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}") |
|
g.initialized = True |
|
|
|
|
|
user_id = request.headers.get('X-User-ID', str(uuid.uuid4())[:8]) |
|
g.user_output_dir = get_user_output_dir(user_id) |
|
|
|
logger.info(f"[{g.request_id}] π {request.method} {request.path} started") |
|
|
|
@app.after_request |
|
def after_request(response): |
|
if hasattr(g, 'request_id') and hasattr(g, 'start_time'): |
|
duration = time.time() - g.start_time |
|
logger.info(f"[{g.request_id}] β
Completed in {duration:.2f}s with status {response.status_code}") |
|
|
|
|
|
if request.endpoint == 'download_audio': |
|
response.headers['Cache-Control'] = 'public, max-age=86400' |
|
else: |
|
response.headers['Cache-Control'] = 'no-store' |
|
|
|
return response |
|
|
|
|
|
@app.errorhandler(Exception) |
|
def handle_exception(e): |
|
logger.error(f"β Unhandled exception: {str(e)}") |
|
logger.debug(traceback.format_exc()) |
|
|
|
return jsonify({ |
|
"error": "Internal server error", |
|
"message": str(e) |
|
}), 500 |
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
def home(): |
|
return jsonify({ |
|
"message": "Speech API is running", |
|
"status": "active", |
|
"version": "1.1", |
|
"environment": "Hugging Face Spaces" |
|
}) |
|
|
|
@app.route("/health", methods=["GET"]) |
|
def health_check(): |
|
health_status = check_model_status() |
|
health_status["api_status"] = "online" |
|
health_status["device"] = device |
|
|
|
|
|
if torch.cuda.is_available(): |
|
health_status["memory"] = { |
|
"cuda_allocated_mb": round(torch.cuda.memory_allocated() / (1024 * 1024), 2), |
|
"cuda_reserved_mb": round(torch.cuda.memory_reserved() / (1024 * 1024), 2) |
|
} |
|
|
|
|
|
health_status["cache_stats"] = { |
|
"asr_cache_size": len(asr_cache), |
|
"tts_cache_size": len(tts_cache), |
|
"translation_cache_size": len(translation_cache) |
|
} |
|
|
|
return jsonify(health_status) |
|
|
|
|
|
@app.route("/asr", methods=["POST"]) |
|
@rate_limit |
|
def transcribe_audio(): |
|
|
|
user_output_dir = g.user_output_dir if hasattr(g, 'user_output_dir') else OUTPUT_DIR |
|
|
|
|
|
if 'audio' in request.files: |
|
audio_file = request.files['audio'] |
|
language = request.form.get("language", "english").lower() |
|
|
|
|
|
audio_content = audio_file.read() |
|
audio_file.seek(0) |
|
|
|
cache_key = f"asr_{compute_hash(audio_content)}_{language}" |
|
|
|
if cache_key in asr_cache: |
|
logger.info(f"[{g.request_id}] β
Using cached ASR result") |
|
return asr_cache[cache_key] |
|
|
|
|
|
result = handle_asr_request(request, user_output_dir, SAMPLE_RATE) |
|
|
|
|
|
if isinstance(result, tuple): |
|
response, status_code = result |
|
if status_code == 200: |
|
asr_cache[cache_key] = result |
|
|
|
|
|
if len(asr_cache) > MAX_CACHE_SIZE: |
|
asr_cache.pop(next(iter(asr_cache))) |
|
|
|
return result |
|
|
|
@app.route("/tts", methods=["POST"]) |
|
@rate_limit |
|
def generate_tts(): |
|
|
|
user_output_dir = g.user_output_dir if hasattr(g, 'user_output_dir') else OUTPUT_DIR |
|
|
|
|
|
if request.is_json: |
|
data = request.get_json() |
|
if data: |
|
text = data.get("text", "").strip() |
|
language = data.get("language", "kapampangan").lower() |
|
|
|
cache_key = f"tts_{compute_hash(text)}_{language}" |
|
|
|
if cache_key in tts_cache: |
|
logger.info(f"[{g.request_id}] β
Using cached TTS result") |
|
return tts_cache[cache_key] |
|
|
|
|
|
result = handle_tts_request(request, user_output_dir) |
|
|
|
|
|
if isinstance(result, tuple): |
|
response, status_code = result |
|
if status_code == 200 and request.is_json: |
|
tts_cache[cache_key] = result |
|
|
|
|
|
if len(tts_cache) > MAX_CACHE_SIZE: |
|
tts_cache.pop(next(iter(tts_cache))) |
|
|
|
return result |
|
|
|
@app.route("/translate", methods=["POST"]) |
|
@rate_limit |
|
def translate_text(): |
|
|
|
if request.is_json: |
|
data = request.get_json() |
|
if data: |
|
text = data.get("text", "").strip() |
|
source_language = data.get("source_language", "").lower() |
|
target_language = data.get("target_language", "").lower() |
|
|
|
cache_key = f"translate_{compute_hash(text)}_{source_language}_{target_language}" |
|
|
|
if cache_key in translation_cache: |
|
logger.info(f"[{g.request_id}] β
Using cached translation result") |
|
return translation_cache[cache_key] |
|
|
|
|
|
result = handle_translation_request(request) |
|
|
|
|
|
if isinstance(result, tuple): |
|
response, status_code = result |
|
if status_code == 200 and request.is_json: |
|
translation_cache[cache_key] = result |
|
|
|
|
|
if len(translation_cache) > MAX_CACHE_SIZE: |
|
translation_cache.pop(next(iter(translation_cache))) |
|
|
|
return result |
|
|
|
@app.route("/download/<filename>", methods=["GET"]) |
|
def download_audio(filename): |
|
|
|
if hasattr(g, 'user_output_dir'): |
|
file_path = os.path.join(g.user_output_dir, filename) |
|
if os.path.exists(file_path): |
|
logger.info(f"π€ Serving user audio file: {file_path}") |
|
return send_file(file_path, mimetype="audio/wav", as_attachment=True) |
|
|
|
|
|
file_path = os.path.join(OUTPUT_DIR, filename) |
|
if os.path.exists(file_path): |
|
logger.info(f"π€ Serving audio file: {file_path}") |
|
return send_file(file_path, mimetype="audio/wav", as_attachment=True) |
|
|
|
|
|
for root, dirs, files in os.walk(OUTPUT_DIR): |
|
if filename in files: |
|
full_path = os.path.join(root, filename) |
|
logger.info(f"π€ Serving found audio file: {full_path}") |
|
return send_file(full_path, mimetype="audio/wav", as_attachment=True) |
|
|
|
logger.warning(f"β οΈ Requested file not found: {filename}") |
|
return jsonify({"error": "File not found"}), 404 |
|
|
|
@app.route("/evaluate", methods=["POST"]) |
|
@rate_limit |
|
def evaluate_pronunciation(): |
|
|
|
user_output_dir = g.user_output_dir if hasattr(g, 'user_output_dir') else OUTPUT_DIR |
|
return handle_evaluation_request(request, REFERENCE_AUDIO_DIR, user_output_dir, SAMPLE_RATE) |
|
|
|
@app.route("/check_references", methods=["GET"]) |
|
def check_references(): |
|
"""Optimized endpoint to check if reference files exist""" |
|
ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi", |
|
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka", |
|
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku", |
|
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na", |
|
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu", |
|
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka", |
|
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka", |
|
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini", |
|
"makananu_munta_king","adwa", "anam", "apat", "apulu", "atlu", "dalan", "libu", "lima", |
|
"metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi", |
|
"arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud", |
|
"pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali", |
|
"pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan" |
|
] |
|
|
|
|
|
summary = { |
|
"reference_audio_dir": REFERENCE_AUDIO_DIR, |
|
"directory_exists": os.path.exists(REFERENCE_AUDIO_DIR), |
|
"total_patterns": len(ref_patterns), |
|
"existing_patterns": 0, |
|
"total_files": 0 |
|
} |
|
|
|
for pattern in ref_patterns: |
|
pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern) |
|
if os.path.exists(pattern_dir): |
|
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav")) |
|
if wav_files: |
|
summary["existing_patterns"] += 1 |
|
summary["total_files"] += len(wav_files) |
|
|
|
return jsonify(summary) |
|
|
|
|
|
@app.route("/check_references/detailed", methods=["GET"]) |
|
def check_references_detailed(): |
|
"""Get detailed information for specific reference patterns""" |
|
patterns = request.args.get('patterns', '').split(',') |
|
|
|
|
|
if not patterns or patterns == ['']: |
|
ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi", |
|
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka", |
|
"pagdulapan_da_ka", "kaluguran_da_ka"] |
|
else: |
|
ref_patterns = [p.strip() for p in patterns if p.strip()] |
|
|
|
results = {} |
|
for pattern in ref_patterns: |
|
pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern) |
|
if os.path.exists(pattern_dir): |
|
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav")) |
|
results[pattern] = { |
|
"exists": True, |
|
"path": pattern_dir, |
|
"file_count": len(wav_files), |
|
"files": [os.path.basename(f) for f in wav_files] |
|
} |
|
else: |
|
results[pattern] = { |
|
"exists": False, |
|
"path": pattern_dir |
|
} |
|
|
|
return jsonify({ |
|
"reference_audio_dir": REFERENCE_AUDIO_DIR, |
|
"patterns": results |
|
}) |
|
|
|
@app.route("/upload_reference", methods=["POST"]) |
|
@rate_limit |
|
def upload_reference_audio(): |
|
return handle_upload_reference(request, REFERENCE_AUDIO_DIR, SAMPLE_RATE) |
|
|
|
|
|
@app.route("/cleanup", methods=["POST"]) |
|
def cleanup_files(): |
|
"""Clean up old files to free space (important for HF Spaces)""" |
|
try: |
|
|
|
if not (request.remote_addr == '127.0.0.1' or |
|
request.headers.get('X-Cleanup-Key') == os.environ.get('CLEANUP_KEY', 'cleanup-secret')): |
|
return jsonify({"error": "Unauthorized"}), 403 |
|
|
|
|
|
cutoff_time = time.time() - 7200 |
|
deleted_count = 0 |
|
|
|
for root, dirs, files in os.walk(OUTPUT_DIR): |
|
for file in files: |
|
try: |
|
file_path = os.path.join(root, file) |
|
if os.path.getmtime(file_path) < cutoff_time: |
|
os.remove(file_path) |
|
deleted_count += 1 |
|
except Exception as e: |
|
logger.warning(f"β οΈ Failed to delete {file}: {e}") |
|
|
|
|
|
for root, dirs, files in os.walk(OUTPUT_DIR, topdown=False): |
|
for dir_name in dirs: |
|
try: |
|
dir_path = os.path.join(root, dir_name) |
|
if not os.listdir(dir_path): |
|
os.rmdir(dir_path) |
|
except Exception as e: |
|
logger.warning(f"β οΈ Failed to remove empty dir {dir_name}: {e}") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
|
|
return jsonify({ |
|
"message": "Cleanup completed", |
|
"files_deleted": deleted_count |
|
}) |
|
except Exception as e: |
|
logger.error(f"β Cleanup error: {str(e)}") |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
if __name__ == "__main__": |
|
|
|
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR) |
|
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR: |
|
REFERENCE_AUDIO_DIR = updated_ref_dir |
|
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}") |
|
|
|
logger.info("π Starting Speech API server optimized for Hugging Face Spaces") |
|
|
|
|
|
status = check_model_status() |
|
logger.info(f"π System status: ASR model: {'β
' if status['asr_model'] == 'loaded' else 'β'}") |
|
for lang, model_status in status['tts_models'].items(): |
|
logger.info(f"π TTS model {lang}: {'β
' if model_status == 'loaded' else 'β'}") |
|
|
|
|
|
app.run(host="0.0.0.0", port=7860, debug=False, threaded=True) |