File size: 8,229 Bytes
4c12485 e181043 4c12485 936eff8 be6f75f 7c33098 704cf08 7c33098 4c12485 1f4aae2 4c12485 e7b87ef 4c12485 9c8a2cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# app.py - Main application file
import os
import sys
import logging
import traceback
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("speech_api")
# Set all cache directories to locations within /tmp
cache_dirs = {
"HF_HOME": "/tmp/hf_home",
"TRANSFORMERS_CACHE": "/tmp/transformers_cache",
"HUGGINGFACE_HUB_CACHE": "/tmp/huggingface_hub_cache",
"TORCH_HOME": "/tmp/torch_home",
"XDG_CACHE_HOME": "/tmp/xdg_cache"
}
# Set environment variables and create directories
for env_var, path in cache_dirs.items():
os.environ[env_var] = path
try:
os.makedirs(path, exist_ok=True)
logger.info(f"π Created cache directory: {path}")
except Exception as e:
logger.error(f"β Failed to create directory {path}: {str(e)}")
# Now import the rest of the libraries
try:
import librosa
import glob
import numpy as np
import torch
from pydub import AudioSegment
import tempfile
import soundfile as sf
from flask import Flask, request, jsonify, send_file, g
from flask_cors import CORS
from werkzeug.utils import secure_filename
# Import functionality from other modules
from translator import (
init_models, check_model_status, handle_asr_request,
handle_tts_request, handle_translation_request
)
from evaluate import (
handle_evaluation_request, handle_upload_reference,
init_reference_audio, calculate_similarity
)
logger.info("β
All required libraries imported successfully")
except ImportError as e:
logger.critical(f"β Failed to import necessary libraries: {str(e)}")
sys.exit(1)
# Check CUDA availability
if torch.cuda.is_available():
logger.info(f"π CUDA available: {torch.cuda.get_device_name(0)}")
device = "cuda"
else:
logger.info("β οΈ CUDA not available, using CPU")
device = "cpu"
# Constants
SAMPLE_RATE = 16000
OUTPUT_DIR = "/tmp/audio_outputs"
REFERENCE_AUDIO_DIR = "./reference_audios"
try:
os.makedirs(OUTPUT_DIR, exist_ok=True)
logger.info(f"π Created output directory: {OUTPUT_DIR}")
except Exception as e:
logger.error(f"β Failed to create output directory: {str(e)}")
# Initialize Flask app
app = Flask(__name__)
CORS(app)
# Load models
init_models(device)
# Define routes
@app.route("/", methods=["GET"])
def home():
return jsonify({"message": "Speech API is running", "status": "active"})
@app.route("/health", methods=["GET"])
def health_check():
health_status = check_model_status()
health_status["api_status"] = "online"
health_status["device"] = device
return jsonify(health_status)
@app.route("/asr", methods=["POST"])
def transcribe_audio():
return handle_asr_request(request, OUTPUT_DIR, SAMPLE_RATE)
@app.route("/tts", methods=["POST"])
def generate_tts():
return handle_tts_request(request, OUTPUT_DIR)
@app.route("/translate", methods=["POST"])
def translate_text():
return handle_translation_request(request)
@app.route("/download/<filename>", methods=["GET"])
def download_audio(filename):
file_path = os.path.join(OUTPUT_DIR, filename)
if os.path.exists(file_path):
logger.info(f"π€ Serving audio file: {file_path}")
return send_file(file_path, mimetype="audio/wav", as_attachment=True)
logger.warning(f"β οΈ Requested file not found: {file_path}")
return jsonify({"error": "File not found"}), 404
@app.route("/evaluate", methods=["POST"])
def evaluate_pronunciation():
return handle_evaluation_request(request, REFERENCE_AUDIO_DIR, OUTPUT_DIR, SAMPLE_RATE)
@app.route("/check_references", methods=["GET"])
def check_references():
"""Endpoint to check if reference files exist and are accessible"""
ref_patterns = ["mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku",
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na",
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu",
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka",
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka",
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini",
"makananu_munta_king","adwa", "anam", "apat", "apulu", "atlu", "dalan", "libu", "lima",
"metung", "pitu", "siyam", "walu", "masala_pa_king_kilwal","tatakut_ku_king_madalumdum","maragul_ing_minaliwas_keka",
"marimla_ing_danum","malagu_ing_babaeng_yan","marok_ing_manigarilyo",
"mababa_ing_tete","pilan_la_reng_malapit_mong_kaluguran",
"matuling_ya_ing_pusa","maputi_ya_ing_asu", "mesakab_ku_nandin_kaya_migkasugat_ku_tud","masakit_la_deng_kayang_bitis",
"mikapali_la_deng_balugbug_na","kinimut_ya_ing_kayang_arung", "mabayat_ya_pakiramdam_ing_kanakung_salu","masakit_ya_ing_kayang_butit",
"meputu_la_deng_kanakung_kuku","mengalgal_la_deng_gamat_na", "minayli_ya_ing_kayang_asbuk","masakit_ya_ing_buntuk_ku", "buring_buri_ng_mag_basketballl_ning_pisan_ku","bibisita_ya_i_dara_ku_kada_duminggu",
"magaling_yang_magpayu_i_achi_ku","manyaman_yang_mag_lutung_tinape_i_apu_ku", "manyaman_yang_mag_lutu_i_ma_ku","kaburi_ng_mamusit_i_tatang_ku",
"suportadu_dakung_pane_ning_pengari_ku","ing_koyang_ku_ing_mag_manehu", "sinopan_ke_ing_kapatad_ku_keng_pamagaral","makulit_ya_ing_wali_ku",
"makabuklat_ya_ing_pasbul","malinis_ya_ing_awang", "neng_kalati_ing_dagis","madagul_ya_ing_bale_da",
"kailangan_ke_ing_ulas","sinali_yang_sambra", "kasala_na_ing_sulu","kumportable_ya_ing_pitudturan_ku",
"nilukluk_ya_keng_luklukan","malambut_ya_ing_ulnan_ku"
]
results = {}
for pattern in ref_patterns:
pattern_dir = os.path.join(REFERENCE_AUDIO_DIR, pattern)
if os.path.exists(pattern_dir):
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
results[pattern] = {
"exists": True,
"path": pattern_dir,
"file_count": len(wav_files),
"files": [os.path.basename(f) for f in wav_files]
}
else:
results[pattern] = {
"exists": False,
"path": pattern_dir
}
return jsonify({
"reference_audio_dir": REFERENCE_AUDIO_DIR,
"directory_exists": os.path.exists(REFERENCE_AUDIO_DIR),
"patterns": results
})
@app.route("/upload_reference", methods=["POST"])
def upload_reference_audio():
return handle_upload_reference(request, REFERENCE_AUDIO_DIR, SAMPLE_RATE)
@app.before_request
def before_request():
global REFERENCE_AUDIO_DIR # Remove this line
if not hasattr(g, 'initialized'):
# This might return an updated path if the original fails
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR:
REFERENCE_AUDIO_DIR = updated_ref_dir
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}")
g.initialized = True
if __name__ == "__main__":
# This might return an updated path if the original fails
updated_ref_dir = init_reference_audio(REFERENCE_AUDIO_DIR, OUTPUT_DIR)
if updated_ref_dir and updated_ref_dir != REFERENCE_AUDIO_DIR:
REFERENCE_AUDIO_DIR = updated_ref_dir
logger.info(f"π Updated reference audio directory to: {REFERENCE_AUDIO_DIR}")
logger.info("π Starting Speech API server")
# Get the status for logging
status = check_model_status()
logger.info(f"π System status: ASR model: {'β
' if status['asr_model'] == 'loaded' else 'β'}")
for lang, model_status in status['tts_models'].items():
logger.info(f"π TTS model {lang}: {'β
' if model_status == 'loaded' else 'β'}")
app.run(host="0.0.0.0", port=7860, debug=True) |