|
|
|
|
|
import os |
|
import glob |
|
import logging |
|
import traceback |
|
import tempfile |
|
import shutil |
|
import json |
|
import fcntl |
|
from difflib import SequenceMatcher |
|
import torch |
|
import torchaudio |
|
from pydub import AudioSegment |
|
from flask import jsonify |
|
from werkzeug.utils import secure_filename |
|
from concurrent.futures import ThreadPoolExecutor |
|
import hashlib |
|
import threading |
|
import time |
|
|
|
|
|
from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES |
|
|
|
|
|
logger = logging.getLogger("speech_api") |
|
|
|
|
|
|
|
REFERENCE_CACHE = {} |
|
|
|
|
|
EVALUATION_CACHE = {} |
|
|
|
|
|
PREPROCESSING_COMPLETE = False |
|
PREPROCESSING_ACTIVE = False |
|
PREPROCESSING_LOCK = threading.Lock() |
|
PREPROCESSING_THREAD = None |
|
PREPROCESSING_PAUSE = threading.Event() |
|
PREPROCESSING_PAUSE.set() |
|
|
|
|
|
LOCK_FILE = "/tmp/speech_api_preprocessing.lock" |
|
_lock_file_handle = None |
|
|
|
def calculate_similarity(text1, text2): |
|
"""Calculate text similarity percentage.""" |
|
def clean_text(text): |
|
return text.lower() |
|
|
|
clean1 = clean_text(text1) |
|
clean2 = clean_text(text2) |
|
|
|
matcher = SequenceMatcher(None, clean1, clean2) |
|
return matcher.ratio() * 100 |
|
|
|
def acquire_preprocessing_lock(): |
|
"""Attempt to acquire the system-wide preprocessing lock using a lock file. |
|
Returns True if lock was acquired, False otherwise""" |
|
try: |
|
|
|
if not os.path.exists(LOCK_FILE): |
|
with open(LOCK_FILE, 'w') as f: |
|
f.write(str(os.getpid())) |
|
|
|
|
|
lock_file = open(LOCK_FILE, 'r+') |
|
try: |
|
fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) |
|
|
|
|
|
lock_file.seek(0) |
|
lock_file.write(str(os.getpid())) |
|
lock_file.truncate() |
|
lock_file.flush() |
|
|
|
|
|
global _lock_file_handle |
|
_lock_file_handle = lock_file |
|
|
|
logger.info("π Acquired preprocessing lock") |
|
return True |
|
except IOError: |
|
|
|
lock_file.close() |
|
logger.info("β οΈ Another process is already running preprocessing") |
|
return False |
|
except Exception as e: |
|
logger.error(f"β Error acquiring preprocessing lock: {str(e)}") |
|
return False |
|
|
|
def release_preprocessing_lock(): |
|
"""Release the preprocessing lock if we have it""" |
|
global _lock_file_handle |
|
if '_lock_file_handle' in globals() and _lock_file_handle: |
|
try: |
|
fcntl.flock(_lock_file_handle, fcntl.LOCK_UN) |
|
_lock_file_handle.close() |
|
logger.info("π Released preprocessing lock") |
|
except Exception as e: |
|
logger.error(f"β Error releasing preprocessing lock: {str(e)}") |
|
|
|
def save_preprocessing_state(reference_dir, state=None): |
|
"""Save the current preprocessing state to a file""" |
|
state_file = os.path.join(reference_dir, ".preprocessing_state.json") |
|
if state is None: |
|
|
|
state = { |
|
"complete": PREPROCESSING_COMPLETE, |
|
"active": PREPROCESSING_ACTIVE, |
|
"patterns_cached": list(REFERENCE_CACHE.keys()), |
|
"timestamp": time.time(), |
|
"pid": os.getpid() |
|
} |
|
|
|
try: |
|
with open(state_file, 'w') as f: |
|
json.dump(state, f) |
|
except Exception as e: |
|
logger.error(f"β Error saving preprocessing state: {str(e)}") |
|
|
|
def load_preprocessing_state(reference_dir): |
|
"""Load preprocessing state from a file""" |
|
state_file = os.path.join(reference_dir, ".preprocessing_state.json") |
|
if not os.path.exists(state_file): |
|
return None |
|
|
|
try: |
|
with open(state_file, 'r') as f: |
|
return json.load(f) |
|
except Exception as e: |
|
logger.error(f"β Error loading preprocessing state: {str(e)}") |
|
return None |
|
|
|
def pause_preprocessing(): |
|
"""Pause preprocessing temporarily""" |
|
PREPROCESSING_PAUSE.clear() |
|
|
|
def resume_preprocessing(): |
|
"""Resume preprocessing after pause""" |
|
PREPROCESSING_PAUSE.set() |
|
|
|
def setup_reference_patterns(reference_dir, sample_rate=16000): |
|
"""Create standard reference pattern directories without dummy files""" |
|
reference_patterns = [ |
|
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi", |
|
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka", |
|
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku", |
|
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na", |
|
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu", |
|
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka", |
|
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka", |
|
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini", |
|
"makananu_munta_king", "adwa", "anam", "apat", "apulu", "atlu", "dinalan", "libu", "lima", |
|
"metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi", |
|
"arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud", |
|
"pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali", |
|
"pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan" |
|
] |
|
|
|
created_dirs = 0 |
|
|
|
for pattern in reference_patterns: |
|
pattern_dir = os.path.join(reference_dir, pattern) |
|
if not os.path.exists(pattern_dir): |
|
try: |
|
os.makedirs(pattern_dir, exist_ok=True) |
|
logger.info(f"π Created reference pattern directory: {pattern_dir}") |
|
created_dirs += 1 |
|
except Exception as e: |
|
logger.error(f"β Failed to create reference pattern directory {pattern_dir}: {str(e)}") |
|
continue |
|
|
|
return created_dirs, 0 |
|
|
|
def search_reference_directories(): |
|
"""Search for possible reference directories in various locations""" |
|
possible_locations = [ |
|
"./reference_audios", |
|
"../reference_audios", |
|
"/app/reference_audios", |
|
"/tmp/reference_audios", |
|
os.path.join(os.path.dirname(os.path.abspath(__file__)), "reference_audios") |
|
] |
|
|
|
found_dirs = [] |
|
for location in possible_locations: |
|
if os.path.exists(location) and os.path.isdir(location): |
|
access_info = { |
|
"readable": os.access(location, os.R_OK), |
|
"writable": os.access(location, os.W_OK), |
|
"executable": os.access(location, os.X_OK) |
|
} |
|
|
|
|
|
pattern_dirs = [d for d in os.listdir(location) |
|
if os.path.isdir(os.path.join(location, d))] |
|
|
|
|
|
wav_count = 0 |
|
for pattern in pattern_dirs: |
|
pattern_path = os.path.join(location, pattern) |
|
wav_count += len(glob.glob(os.path.join(pattern_path, "*.wav"))) |
|
|
|
found_dirs.append({ |
|
"path": location, |
|
"access": access_info, |
|
"pattern_dirs": len(pattern_dirs), |
|
"wav_files": wav_count |
|
}) |
|
|
|
return found_dirs |
|
|
|
def transcribe_audio(waveform, sample_rate, asr_model, asr_processor): |
|
"""Helper function to transcribe audio using the ASR model""" |
|
inputs = asr_processor( |
|
waveform, |
|
sampling_rate=sample_rate, |
|
return_tensors="pt" |
|
) |
|
inputs = {k: v.to(asr_model.device) for k, v in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
logits = asr_model(**inputs).logits |
|
ids = torch.argmax(logits, dim=-1)[0] |
|
transcription = asr_processor.decode(ids) |
|
|
|
return transcription |
|
|
|
def preprocess_reference_file(ref_file, sample_rate, asr_model, asr_processor): |
|
"""Preprocess a single reference file and return its transcription""" |
|
ref_filename = os.path.basename(ref_file) |
|
try: |
|
|
|
ref_waveform, ref_sr = torchaudio.load(ref_file) |
|
if ref_sr != sample_rate: |
|
ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform) |
|
ref_waveform = ref_waveform.squeeze().numpy() |
|
|
|
|
|
ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor) |
|
|
|
logger.debug(f"Preprocessed reference file: {ref_filename}, transcription: '{ref_transcription}'") |
|
|
|
return { |
|
"waveform": ref_waveform, |
|
"transcription": ref_transcription, |
|
"processed_at": time.time() |
|
} |
|
except Exception as e: |
|
logger.error(f"β Error preprocessing {ref_filename}: {str(e)}") |
|
return None |
|
|
|
def preprocess_all_references(reference_dir, sample_rate=16000): |
|
"""Preprocess all reference audio files at startup""" |
|
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE |
|
|
|
|
|
if not acquire_preprocessing_lock(): |
|
logger.info("β© Skipping preprocessing as another process is already handling it") |
|
return False |
|
|
|
try: |
|
logger.info("π Starting preprocessing of all reference audio files...") |
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_ACTIVE = True |
|
|
|
|
|
save_preprocessing_state(reference_dir) |
|
|
|
|
|
asr_model = get_asr_model() |
|
asr_processor = get_asr_processor() |
|
|
|
if asr_model is None or asr_processor is None: |
|
logger.error("β Cannot preprocess reference audio - ASR models not loaded") |
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_ACTIVE = False |
|
save_preprocessing_state(reference_dir) |
|
release_preprocessing_lock() |
|
return False |
|
|
|
try: |
|
pattern_dirs = [d for d in os.listdir(reference_dir) |
|
if os.path.isdir(os.path.join(reference_dir, d))] |
|
|
|
total_processed = 0 |
|
start_time = time.time() |
|
|
|
|
|
for pattern in pattern_dirs: |
|
|
|
PREPROCESSING_PAUSE.wait() |
|
|
|
pattern_path = os.path.join(reference_dir, pattern) |
|
reference_files = glob.glob(os.path.join(pattern_path, "*.wav")) |
|
reference_files = [f for f in reference_files if "dummy_reference" not in f] |
|
|
|
if not reference_files: |
|
continue |
|
|
|
|
|
if pattern not in REFERENCE_CACHE: |
|
REFERENCE_CACHE[pattern] = {} |
|
|
|
logger.info(f"π Preprocessing {len(reference_files)} references for pattern: {pattern}") |
|
pattern_start_time = time.time() |
|
|
|
|
|
max_workers = min(os.cpu_count() or 4, len(reference_files), 5) |
|
|
|
processed_in_pattern = 0 |
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
tasks = { |
|
executor.submit(preprocess_reference_file, ref_file, sample_rate, asr_model, asr_processor): |
|
ref_file for ref_file in reference_files |
|
} |
|
|
|
for future in tasks: |
|
ref_file = tasks[future] |
|
try: |
|
result = future.result() |
|
if result: |
|
REFERENCE_CACHE[pattern][os.path.basename(ref_file)] = result |
|
total_processed += 1 |
|
processed_in_pattern += 1 |
|
except Exception as e: |
|
logger.error(f"β Failed to process {ref_file}: {str(e)}") |
|
|
|
|
|
pattern_time = time.time() - pattern_start_time |
|
logger.info(f"β
Completed preprocessing pattern '{pattern}' - {processed_in_pattern}/{len(reference_files)} files in {pattern_time:.2f}s") |
|
|
|
|
|
save_preprocessing_state(reference_dir) |
|
|
|
elapsed_time = time.time() - start_time |
|
logger.info(f"β
Preprocessing complete! Processed {total_processed} reference files in {elapsed_time:.2f} seconds") |
|
|
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_COMPLETE = True |
|
PREPROCESSING_ACTIVE = False |
|
|
|
|
|
save_preprocessing_state(reference_dir) |
|
release_preprocessing_lock() |
|
return True |
|
|
|
except Exception as e: |
|
logger.error(f"β Error during reference preprocessing: {str(e)}") |
|
logger.debug(f"Stack trace: {traceback.format_exc()}") |
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_ACTIVE = False |
|
save_preprocessing_state(reference_dir) |
|
release_preprocessing_lock() |
|
return False |
|
|
|
except Exception as e: |
|
logger.error(f"β Unhandled exception in preprocessing: {str(e)}") |
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_ACTIVE = False |
|
save_preprocessing_state(reference_dir) |
|
release_preprocessing_lock() |
|
return False |
|
|
|
def start_preprocessing_thread(reference_dir, sample_rate=16000): |
|
"""Start preprocessing in a background thread""" |
|
global PREPROCESSING_THREAD, PREPROCESSING_ACTIVE |
|
|
|
|
|
with PREPROCESSING_LOCK: |
|
if PREPROCESSING_ACTIVE: |
|
logger.info("β© Skipping preprocessing start as it's already active") |
|
return False |
|
|
|
|
|
state = load_preprocessing_state(reference_dir) |
|
if state and state.get("complete", False): |
|
logger.info("β© Skipping preprocessing as previous run was completed") |
|
with PREPROCESSING_LOCK: |
|
PREPROCESSING_COMPLETE = True |
|
return False |
|
|
|
def preprocessing_worker(): |
|
preprocess_all_references(reference_dir, sample_rate) |
|
|
|
PREPROCESSING_THREAD = threading.Thread(target=preprocessing_worker) |
|
PREPROCESSING_THREAD.daemon = True |
|
PREPROCESSING_THREAD.start() |
|
|
|
logger.info("π§΅ Started reference audio preprocessing in background thread") |
|
return True |
|
|
|
def init_reference_audio(reference_dir, output_dir): |
|
"""Initialize reference audio directories and start preprocessing""" |
|
try: |
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
logger.info(f"π Created output directory: {output_dir}") |
|
|
|
|
|
found_dirs = search_reference_directories() |
|
for directory in found_dirs: |
|
logger.info(f"π Found reference directory: {directory['path']} " |
|
f"(patterns: {directory['pattern_dirs']}, wav files: {directory['wav_files']})") |
|
|
|
|
|
working_dir = reference_dir |
|
|
|
|
|
if not os.path.exists(reference_dir) or not os.access(reference_dir, os.W_OK): |
|
logger.warning(f"β οΈ Provided reference directory {reference_dir} is not writable") |
|
|
|
|
|
for directory in found_dirs: |
|
if directory['access']['writable'] and directory['pattern_dirs'] > 0: |
|
working_dir = directory['path'] |
|
logger.info(f"β
Using found reference directory: {working_dir}") |
|
break |
|
else: |
|
|
|
working_dir = os.path.join('/tmp', 'reference_audios') |
|
logger.warning(f"β οΈ Using fallback reference directory in /tmp: {working_dir}") |
|
|
|
|
|
os.makedirs(working_dir, exist_ok=True) |
|
logger.info(f"π Using reference directory: {working_dir}") |
|
|
|
|
|
dirs_created, _ = setup_reference_patterns(working_dir) |
|
logger.info(f"π Created {dirs_created} directories") |
|
|
|
|
|
if len(found_dirs) > 1: |
|
|
|
for directory in found_dirs: |
|
if directory['path'] != working_dir and directory['wav_files'] > 0: |
|
try: |
|
source_dir = directory['path'] |
|
logger.info(f"π Copying reference files from {source_dir} to {working_dir}") |
|
|
|
|
|
|
|
for item in os.listdir(source_dir): |
|
src_path = os.path.join(source_dir, item) |
|
if os.path.isdir(src_path): |
|
wav_files = glob.glob(os.path.join(src_path, "*.wav")) |
|
|
|
wav_files = [f for f in wav_files if "dummy_reference" not in f] |
|
|
|
if wav_files: |
|
dst_path = os.path.join(working_dir, item) |
|
os.makedirs(dst_path, exist_ok=True) |
|
|
|
|
|
for wav_file in wav_files: |
|
wav_name = os.path.basename(wav_file) |
|
if "dummy_reference" not in wav_name: |
|
dst_file = os.path.join(dst_path, wav_name) |
|
if not os.path.exists(dst_file): |
|
shutil.copy2(wav_file, dst_file) |
|
logger.info(f"π Copied {wav_name} to {dst_path}") |
|
|
|
break |
|
except Exception as e: |
|
logger.warning(f"β οΈ Failed to copy reference files: {str(e)}") |
|
|
|
|
|
pattern_dirs = [d for d in os.listdir(working_dir) |
|
if os.path.isdir(os.path.join(working_dir, d))] |
|
|
|
|
|
total_wav_files = 0 |
|
for pattern in pattern_dirs: |
|
pattern_path = os.path.join(working_dir, pattern) |
|
wav_files = glob.glob(os.path.join(pattern_path, "*.wav")) |
|
|
|
valid_files = [f for f in wav_files if "dummy_reference" not in f] |
|
total_wav_files += len(valid_files) |
|
|
|
logger.info(f"π Total pattern directories: {len(pattern_dirs)}, Total reference WAV files: {total_wav_files}") |
|
|
|
|
|
for pattern in pattern_dirs: |
|
pattern_path = os.path.join(working_dir, pattern) |
|
dummy_files = glob.glob(os.path.join(pattern_path, "dummy_reference.wav")) |
|
for dummy in dummy_files: |
|
try: |
|
os.remove(dummy) |
|
logger.info(f"ποΈ Removed dummy file: {dummy}") |
|
except Exception as e: |
|
logger.warning(f"β οΈ Failed to remove dummy file {dummy}: {str(e)}") |
|
|
|
|
|
start_preprocessing_thread(working_dir) |
|
|
|
return working_dir |
|
|
|
except Exception as e: |
|
logger.error(f"β Failed to set up reference audio directory: {str(e)}") |
|
logger.debug(f"Stack trace: {traceback.format_exc()}") |
|
|
|
|
|
fallback_dir = os.path.join('/tmp', 'reference_audios') |
|
try: |
|
os.makedirs(fallback_dir, exist_ok=True) |
|
setup_reference_patterns(fallback_dir) |
|
logger.warning(f"β οΈ Using emergency fallback directory: {fallback_dir}") |
|
return fallback_dir |
|
except: |
|
logger.critical("π₯ CRITICAL: Failed to create even a fallback directory") |
|
return reference_dir |
|
|
|
def handle_evaluation_request(request, reference_dir, output_dir, sample_rate): |
|
"""Handle pronunciation evaluation requests with preprocessing optimization""" |
|
global REFERENCE_CACHE, PREPROCESSING_COMPLETE |
|
|
|
request_id = f"req-{id(request)}" |
|
logger.info(f"[{request_id}] π Starting pronunciation evaluation request") |
|
|
|
|
|
pause_preprocessing() |
|
|
|
temp_dir = None |
|
|
|
|
|
asr_model = get_asr_model() |
|
asr_processor = get_asr_processor() |
|
|
|
if asr_model is None or asr_processor is None: |
|
logger.error(f"[{request_id}] β Evaluation endpoint called but ASR models aren't loaded") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": "ASR model not available"}), 503 |
|
|
|
try: |
|
|
|
if "audio" not in request.files: |
|
logger.warning(f"[{request_id}] β οΈ Evaluation request missing audio file") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": "No audio file uploaded"}), 400 |
|
|
|
audio_file = request.files["audio"] |
|
reference_locator = request.form.get("reference_locator", "").strip() |
|
language = request.form.get("language", "kapampangan").lower() |
|
|
|
|
|
if not reference_locator: |
|
logger.warning(f"[{request_id}] β οΈ No reference locator provided") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": "Reference locator is required"}), 400 |
|
|
|
|
|
audio_content = audio_file.read() |
|
audio_file.seek(0) |
|
|
|
audio_hash = hashlib.md5(audio_content).hexdigest() |
|
cache_key = f"{audio_hash}_{reference_locator}_{language}" |
|
|
|
|
|
if cache_key in EVALUATION_CACHE: |
|
logger.info(f"[{request_id}] β
Using cached evaluation result") |
|
|
|
resume_preprocessing() |
|
return EVALUATION_CACHE[cache_key] |
|
|
|
|
|
reference_dir_path = os.path.join(reference_dir, reference_locator) |
|
logger.info(f"[{request_id}] π Reference directory path: {reference_dir_path}") |
|
|
|
|
|
if not os.path.exists(reference_dir_path): |
|
try: |
|
os.makedirs(reference_dir_path, exist_ok=True) |
|
logger.warning(f"[{request_id}] β οΈ Created missing reference directory: {reference_dir_path}") |
|
except Exception as e: |
|
logger.error(f"[{request_id}] β Failed to create reference directory: {str(e)}") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Reference audio directory not found: {reference_locator}"}), 404 |
|
|
|
|
|
reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav")) |
|
|
|
reference_files = [f for f in reference_files if "dummy_reference" not in f] |
|
logger.info(f"[{request_id}] π Found {len(reference_files)} valid reference files") |
|
|
|
|
|
if not reference_files: |
|
logger.warning(f"[{request_id}] β οΈ No valid reference audio files found in {reference_dir_path}") |
|
|
|
resume_preprocessing() |
|
return jsonify({ |
|
"error": f"No reference audio found for {reference_locator}", |
|
"message": "Please upload a reference audio file before evaluation.", |
|
"status": "MISSING_REFERENCE" |
|
}), 404 |
|
|
|
lang_code = LANGUAGE_CODES.get(language, language) |
|
logger.info(f"[{request_id}] π Evaluating pronunciation for reference: {reference_locator}") |
|
|
|
|
|
temp_dir = os.path.join(output_dir, f"temp_{request_id}") |
|
os.makedirs(temp_dir, exist_ok=True) |
|
|
|
|
|
user_audio_path = os.path.join(temp_dir, "user_audio_input.wav") |
|
with open(user_audio_path, 'wb') as f: |
|
f.write(audio_content) |
|
|
|
try: |
|
logger.info(f"[{request_id}] π Processing user audio file") |
|
audio = AudioSegment.from_file(user_audio_path) |
|
audio = audio.set_frame_rate(sample_rate).set_channels(1) |
|
|
|
processed_path = os.path.join(temp_dir, "processed_user_audio.wav") |
|
audio.export(processed_path, format="wav") |
|
|
|
user_waveform, sr = torchaudio.load(processed_path) |
|
user_waveform = user_waveform.squeeze().numpy() |
|
logger.info(f"[{request_id}] β
User audio processed: {sr}Hz, length: {len(user_waveform)} samples") |
|
|
|
user_audio_path = processed_path |
|
except Exception as e: |
|
logger.error(f"[{request_id}] β Audio processing failed: {str(e)}") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500 |
|
|
|
|
|
try: |
|
logger.info(f"[{request_id}] π Transcribing user audio") |
|
user_transcription = transcribe_audio(user_waveform, sample_rate, asr_model, asr_processor) |
|
logger.info(f"[{request_id}] β
User transcription: '{user_transcription}'") |
|
except Exception as e: |
|
logger.error(f"[{request_id}] β ASR inference failed: {str(e)}") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500 |
|
|
|
|
|
using_preprocessed = False |
|
all_results = [] |
|
|
|
if reference_locator in REFERENCE_CACHE and REFERENCE_CACHE[reference_locator]: |
|
using_preprocessed = True |
|
logger.info(f"[{request_id}] π Using preprocessed reference data for {reference_locator}") |
|
|
|
|
|
for ref_filename, ref_data in REFERENCE_CACHE[reference_locator].items(): |
|
ref_transcription = ref_data["transcription"] |
|
similarity = calculate_similarity(user_transcription, ref_transcription) |
|
|
|
logger.info( |
|
f"[{request_id}] π Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'") |
|
|
|
all_results.append({ |
|
"reference_file": ref_filename, |
|
"reference_text": ref_transcription, |
|
"similarity_score": similarity |
|
}) |
|
|
|
else: |
|
|
|
logger.info(f"[{request_id}] β οΈ No preprocessed data available for {reference_locator}, processing on demand") |
|
|
|
|
|
import random |
|
import multiprocessing |
|
|
|
|
|
max_workers = min(multiprocessing.cpu_count(), len(reference_files), 3) |
|
|
|
|
|
def process_reference_file(ref_file): |
|
ref_filename = os.path.basename(ref_file) |
|
try: |
|
|
|
ref_waveform, ref_sr = torchaudio.load(ref_file) |
|
if ref_sr != sample_rate: |
|
ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform) |
|
ref_waveform = ref_waveform.squeeze().numpy() |
|
|
|
|
|
ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor) |
|
|
|
|
|
if reference_locator not in REFERENCE_CACHE: |
|
REFERENCE_CACHE[reference_locator] = {} |
|
|
|
REFERENCE_CACHE[reference_locator][ref_filename] = { |
|
"waveform": ref_waveform, |
|
"transcription": ref_transcription, |
|
"processed_at": time.time() |
|
} |
|
|
|
|
|
similarity = calculate_similarity(user_transcription, ref_transcription) |
|
|
|
logger.info( |
|
f"[{request_id}] π Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'") |
|
|
|
return { |
|
"reference_file": ref_filename, |
|
"reference_text": ref_transcription, |
|
"similarity_score": similarity |
|
} |
|
except Exception as e: |
|
logger.error(f"[{request_id}] β Error processing {ref_filename}: {str(e)}") |
|
return { |
|
"reference_file": ref_filename, |
|
"reference_text": "Error", |
|
"similarity_score": 0, |
|
"error": str(e) |
|
} |
|
|
|
|
|
if len(reference_files) > 3 and not using_preprocessed: |
|
reference_files_sample = random.sample(reference_files, 3) |
|
else: |
|
reference_files_sample = reference_files |
|
|
|
logger.info(f"[{request_id}] π Processing {len(reference_files_sample)} reference files") |
|
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
initial_results = list(executor.map(process_reference_file, reference_files_sample)) |
|
all_results = initial_results.copy() |
|
|
|
|
|
if len(reference_files_sample) < len(reference_files) and not using_preprocessed: |
|
|
|
best_score = 0 |
|
for result in all_results: |
|
if result["similarity_score"] > best_score: |
|
best_score = result["similarity_score"] |
|
|
|
|
|
if best_score < 80.0: |
|
remaining_files = [f for f in reference_files if f not in reference_files_sample] |
|
logger.info(f"[{request_id}] π Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references") |
|
|
|
|
|
additional_files = remaining_files[:5] |
|
|
|
|
|
additional_results = list(executor.map(process_reference_file, additional_files)) |
|
all_results.extend(additional_results) |
|
|
|
|
|
try: |
|
if temp_dir and os.path.exists(temp_dir): |
|
shutil.rmtree(temp_dir) |
|
logger.debug(f"[{request_id}] π§Ή Cleaned up temporary directory") |
|
except Exception as e: |
|
logger.warning(f"[{request_id}] β οΈ Failed to clean up temp files: {str(e)}") |
|
|
|
|
|
best_score = 0 |
|
best_reference = None |
|
best_transcription = None |
|
|
|
|
|
all_results.sort(key=lambda x: x["similarity_score"], reverse=True) |
|
|
|
if all_results: |
|
best_result = all_results[0] |
|
best_score = best_result["similarity_score"] |
|
best_reference = best_result["reference_file"] |
|
best_transcription = best_result["reference_text"] |
|
|
|
|
|
is_correct = best_score >= 80.0 |
|
|
|
|
|
if best_score >= 90.0: |
|
feedback = "Perfect pronunciation! Excellent job!" |
|
elif best_score >= 80.0: |
|
feedback = "Great pronunciation! Your accent is very good." |
|
|
|
elif best_score >= 70.0: |
|
feedback = "Almost there. Keep practicing to improve your pronunciation." |
|
elif best_score >= 50.0: |
|
feedback = "Fair attempt. Try focusing on the syllables that differ from the sample." |
|
else: |
|
feedback = "Try again. Listen carefully to the sample pronunciation." |
|
|
|
logger.info(f"[{request_id}] π Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}") |
|
logger.info(f"[{request_id}] π Feedback: '{feedback}'") |
|
logger.info(f"[{request_id}] β
Evaluation complete using {'preprocessed' if using_preprocessed else 'on-demand'} reference data") |
|
|
|
|
|
response = jsonify({ |
|
"is_correct": is_correct, |
|
"score": best_score, |
|
"feedback": feedback, |
|
"user_transcription": user_transcription, |
|
"best_reference_transcription": best_transcription, |
|
"reference_locator": reference_locator, |
|
"details": all_results, |
|
"total_references_compared": len(all_results), |
|
"total_available_references": len(reference_files), |
|
"used_preprocessed_data": using_preprocessed, |
|
"preprocessing_status": get_preprocessing_status() |
|
}) |
|
|
|
|
|
MAX_CACHE_SIZE = 50 |
|
EVALUATION_CACHE[cache_key] = response |
|
if len(EVALUATION_CACHE) > MAX_CACHE_SIZE: |
|
|
|
EVALUATION_CACHE.pop(next(iter(EVALUATION_CACHE))) |
|
|
|
|
|
resume_preprocessing() |
|
return response |
|
|
|
except Exception as e: |
|
logger.error(f"[{request_id}] β Unhandled exception in evaluation endpoint: {str(e)}") |
|
logger.debug(f"[{request_id}] Stack trace: {traceback.format_exc()}") |
|
|
|
|
|
try: |
|
if temp_dir and os.path.exists(temp_dir): |
|
shutil.rmtree(temp_dir) |
|
except: |
|
pass |
|
|
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Internal server error: {str(e)}"}), 500 |
|
|
|
def handle_upload_reference(request, reference_dir, sample_rate): |
|
"""Handle upload of reference audio files and preprocess immediately""" |
|
global REFERENCE_CACHE |
|
|
|
|
|
pause_preprocessing() |
|
|
|
try: |
|
if "audio" not in request.files: |
|
logger.warning("β οΈ Reference upload missing audio file") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": "No audio file uploaded"}), 400 |
|
|
|
reference_word = request.form.get("reference_word", "").strip() |
|
if not reference_word: |
|
logger.warning("β οΈ Reference upload missing reference word") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": "No reference word provided"}), 400 |
|
|
|
|
|
reference_patterns = [ |
|
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi", |
|
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka", |
|
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku", |
|
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na", |
|
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu", |
|
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka", |
|
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka", |
|
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini", |
|
"makananu_munta_king", "adwa", "anam", "apat", "apulu", "atlu", "dinalan", "libu", "lima", |
|
"metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi", |
|
"arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud", |
|
"pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali", |
|
"pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan" |
|
] |
|
|
|
if reference_word not in reference_patterns: |
|
logger.warning(f"β οΈ Invalid reference word: {reference_word}") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400 |
|
|
|
|
|
if not os.path.exists(reference_dir): |
|
reference_dir = os.path.join('/tmp', 'reference_audios') |
|
os.makedirs(reference_dir, exist_ok=True) |
|
logger.warning(f"β οΈ Using alternate reference directory for upload: {reference_dir}") |
|
|
|
|
|
pattern_dir = os.path.join(reference_dir, reference_word) |
|
os.makedirs(pattern_dir, exist_ok=True) |
|
|
|
|
|
audio_file = request.files["audio"] |
|
filename = secure_filename(audio_file.filename) |
|
|
|
|
|
if not filename.lower().endswith('.wav'): |
|
base_name = os.path.splitext(filename)[0] |
|
filename = f"{base_name}.wav" |
|
|
|
file_path = os.path.join(pattern_dir, filename) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False) as temp_file: |
|
audio_file.save(temp_file.name) |
|
temp_path = temp_file.name |
|
|
|
try: |
|
|
|
audio = AudioSegment.from_file(temp_path) |
|
audio = audio.set_frame_rate(sample_rate).set_channels(1) |
|
audio.export(file_path, format="wav") |
|
logger.info(f"β
Reference audio saved successfully for {reference_word}: {file_path}") |
|
|
|
|
|
try: |
|
os.unlink(temp_path) |
|
except: |
|
pass |
|
|
|
|
|
asr_model = get_asr_model() |
|
asr_processor = get_asr_processor() |
|
|
|
if asr_model and asr_processor: |
|
|
|
if reference_word not in REFERENCE_CACHE: |
|
REFERENCE_CACHE[reference_word] = {} |
|
|
|
|
|
result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor) |
|
if result: |
|
REFERENCE_CACHE[reference_word][filename] = result |
|
logger.info(f"β
New reference audio preprocessed and added to cache: {filename}") |
|
|
|
except Exception as e: |
|
logger.error(f"β Reference audio processing failed: {str(e)}") |
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500 |
|
|
|
|
|
references = glob.glob(os.path.join(pattern_dir, "*.wav")) |
|
|
|
|
|
resume_preprocessing() |
|
return jsonify({ |
|
"message": "Reference audio uploaded successfully", |
|
"reference_word": reference_word, |
|
"file": filename, |
|
"total_references": len(references), |
|
"preprocessed": True |
|
}) |
|
|
|
except Exception as e: |
|
logger.error(f"β Unhandled exception in reference upload: {str(e)}") |
|
logger.debug(f"Stack trace: {traceback.format_exc()}") |
|
|
|
|
|
resume_preprocessing() |
|
return jsonify({"error": f"Internal server error: {str(e)}"}), 500 |
|
|
|
|
|
def get_preprocessing_status(): |
|
"""Get the current status of reference audio preprocessing""" |
|
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE |
|
|
|
with PREPROCESSING_LOCK: |
|
is_complete = PREPROCESSING_COMPLETE |
|
is_active = PREPROCESSING_ACTIVE |
|
|
|
|
|
preprocessed_count = 0 |
|
reference_patterns_count = 0 |
|
|
|
for pattern, files in REFERENCE_CACHE.items(): |
|
preprocessed_count += len(files) |
|
if len(files) > 0: |
|
reference_patterns_count += 1 |
|
|
|
|
|
thread_running = PREPROCESSING_THREAD is not None and PREPROCESSING_THREAD.is_alive() |
|
|
|
|
|
is_paused = not PREPROCESSING_PAUSE.is_set() |
|
|
|
return { |
|
"complete": is_complete, |
|
"active": is_active, |
|
"paused": is_paused, |
|
"preprocessed_files": preprocessed_count, |
|
"patterns_cached": len(REFERENCE_CACHE), |
|
"completed_patterns": reference_patterns_count, |
|
"thread_running": thread_running, |
|
"pid": os.getpid() |
|
} |
|
|
|
|
|
def cleanup_resources(): |
|
"""Clean up any resources when the module is unloaded/restarted""" |
|
release_preprocessing_lock() |
|
|
|
|
|
import atexit |
|
atexit.register(cleanup_resources) |