|
|
|
|
|
import os |
|
import sys |
|
import logging |
|
import traceback |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s', |
|
datefmt='%Y-%m-%d %H:%M:%S' |
|
) |
|
logger = logging.getLogger("speech_api") |
|
|
|
|
|
cache_dirs = { |
|
"HF_HOME": "/tmp/hf_home", |
|
"TRANSFORMERS_CACHE": "/tmp/transformers_cache", |
|
"HUGGINGFACE_HUB_CACHE": "/tmp/huggingface_hub_cache", |
|
"TORCH_HOME": "/tmp/torch_home", |
|
"XDG_CACHE_HOME": "/tmp/xdg_cache" |
|
} |
|
|
|
|
|
for env_var, path in cache_dirs.items(): |
|
os.environ[env_var] = path |
|
try: |
|
os.makedirs(path, exist_ok=True) |
|
logger.info(f"π Created cache directory: {path}") |
|
except Exception as e: |
|
logger.error(f"β Failed to create directory {path}: {str(e)}") |
|
|
|
|
|
try: |
|
import librosa |
|
import glob |
|
import numpy as np |
|
import torch |
|
from pydub import AudioSegment |
|
import tempfile |
|
import soundfile as sf |
|
from flask import Flask, request, jsonify, send_file, g |
|
from flask_cors import CORS |
|
from werkzeug.utils import secure_filename |
|
|
|
|
|
from translator import ( |
|
init_models, check_model_status, handle_asr_request, |
|
handle_tts_request, handle_translation_request |
|
) |
|
from evaluate import ( |
|
handle_evaluation_request, handle_upload_reference, |
|
init_reference_audio, calculate_similarity |
|
) |
|
|
|
logger.info("β
All required libraries imported successfully") |
|
except ImportError as e: |
|
logger.critical(f"β Failed to import necessary libraries: {str(e)}") |
|
sys.exit(1) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
logger.info(f"π CUDA available: {torch.cuda.get_device_name(0)}") |
|
device = "cuda" |
|
else: |
|
logger.info("β οΈ CUDA not available, using CPU") |
|
device = "cpu" |
|
|
|
|
|
SAMPLE_RATE = 16000 |
|
OUTPUT_DIR = "/tmp/audio_outputs" |
|
REFERENCE_AUDIO_DIR = "./reference_audios" |
|
|
|
try: |
|
os.makedirs(OUTPUT_DIR, exist_ok=True) |
|
logger.info(f"π Created output directory: {OUTPUT_DIR}") |
|
except Exception as e: |
|
logger.error(f"β Failed to create output directory: {str(e)}") |
|
|
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
init_models(device) |
|
|
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
def home(): |
|
return jsonify({"message": "Speech API is running", "status": "active"}) |
|
|
|
|
|
@app.route("/health", methods=["GET"]) |
|
def health_check(): |
|
health_status = check_model_status() |
|
health_status["api_status"] = "online" |
|
health_status["device"] = device |
|
return jsonify(health_status) |
|
|
|
|
|
@app.route("/asr", methods=["POST"]) |
|
def transcribe_audio(): |
|
return handle_asr_request(request, OUTPUT_DIR, SAMPLE_RATE) |
|
|
|
|
|
@app.route("/tts", methods=["POST"]) |
|
def generate_tts(): |
|
return handle_tts_request(request, OUTPUT_DIR) |
|
|
|
|
|
@app.route("/translate", methods=["POST"]) |
|
def translate_text(): |
|
return handle_translation_request(request) |
|
|
|
|
|
@app.route("/download/<filename>", methods=["GET"]) |
|
def download_audio(filename): |
|
file_path = os.path.join(OUTPUT_DIR, filename) |
|
if os.path.exists(file_path): |
|
logger.info(f"π€ Serving audio file: {file_path}") |
|
return send_file(file_path, mimetype="audio/wav", as_attachment=True) |
|
|
|
logger.warning(f"β οΈ Requested file not found: {file_path}") |
|
return jsonify({"error": "File not found"}), 404 |
|
|
|
|
|
@app.route("/evaluate", methods=["POST"]) |
|
def evaluate_pronunciation(): |
|
return handle_evaluation_request(request, REFERENCE_AUDIO_DIR, OUTPUT_DIR, SAMPLE_RATE) |
|
|
|
|
|
@app.route("/check_references", methods=["GET"]) |
|
def check_references(): |
|
"""Endpoint to check if reference files exist and are accessible""" |
|
ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi", |
|
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka", |
|
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku", |
|
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na", |
|
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu", |
|
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka", |
|
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka", |
|
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini", |
|
"makananu_munta_king"] |
|
results = {} |
|
|
|
for pattern in ref_patterns: |
|
pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern) |
|
if os.path.exists(pattern_dir): |
|
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav")) |
|
results[pattern] = { |
|
"exists": True, |
|
"path": pattern_dir, |
|
"file_count": len(wav_files), |
|
"files": [os.path.basename(f) for f in wav_files] |
|
} |
|
else: |
|
results[pattern] = { |
|
"exists": False, |
|
"path": pattern_dir |
|
} |
|
|
|
return jsonify({ |
|
"reference_audio_dir": REFERENCE_AUDIO_DIR, |
|
"directory_exists": os.path.exists(REFERENCE_AUDIO_DIR), |
|
"patterns": results |
|
}) |
|
|
|
|
|
@app.route("/upload_reference", methods=["POST"]) |
|
def upload_reference_audio(): |
|
return handle_upload_reference(request, REFERENCE_AUDIO_DIR, SAMPLE_RATE) |
|
|
|
|
|
@app.before_request |
|
def before_request(): |
|
global REFERENCE_AUDIO_DIR |
|
if not hasattr(g, 'initialized'): |
|
|
|
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR) |
|
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR: |
|
REFERENCE_AUDIO_DIR = updated_ref_dir |
|
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}") |
|
g.initialized = True |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR) |
|
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR: |
|
REFERENCE_AUDIO_DIR = updated_ref_dir |
|
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}") |
|
|
|
logger.info("π Starting Speech API server") |
|
|
|
|
|
status = check_model_status() |
|
logger.info(f"π System status: ASR model: {'β
' if status['asr_model'] == 'loaded' else 'β'}") |
|
for lang, model_status in status['tts_models'].items(): |
|
logger.info(f"π TTS model {lang}: {'β
' if model_status == 'loaded' else 'β'}") |
|
|
|
app.run(host="0.0.0.0", port=7860, debug=True) |