Update evaluate.py
Browse files- evaluate.py +23 -25
evaluate.py
CHANGED
@@ -309,7 +309,7 @@ def handle_upload_reference(request, reference_dir, sample_rate):
|
|
309 |
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
310 |
|
311 |
def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
312 |
-
"""Handle pronunciation evaluation requests"""
|
313 |
request_id = f"req-{id(request)}" # Create unique ID for this request
|
314 |
logger.info(f"[{request_id}] π Starting new pronunciation evaluation request")
|
315 |
|
@@ -415,18 +415,18 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
415 |
logger.error(f"[{request_id}] β ASR inference failed: {str(e)}")
|
416 |
return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
|
417 |
|
418 |
-
# Process reference files
|
419 |
-
|
|
|
|
|
|
|
420 |
results = []
|
421 |
-
|
422 |
-
best_reference = None
|
423 |
-
best_transcription = None
|
424 |
-
|
425 |
# Use this if you want to limit the number of files to process
|
426 |
-
max_files_to_check = min(
|
427 |
reference_files = reference_files[:max_files_to_check]
|
428 |
|
429 |
-
logger.info(f"[{request_id}] π Processing {len(reference_files)} reference files in
|
430 |
|
431 |
# Function to process a single reference file
|
432 |
def process_reference_file(ref_file):
|
@@ -472,22 +472,20 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
472 |
"error": str(e)
|
473 |
}
|
474 |
|
475 |
-
# Process files
|
476 |
-
with ThreadPoolExecutor(max_workers=
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
logger.info(f"[{request_id}] π Found excellent match: {best_score:.2f}%")
|
490 |
-
break
|
491 |
|
492 |
# Clean up temp files
|
493 |
try:
|
|
|
309 |
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
310 |
|
311 |
def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
312 |
+
"""Handle pronunciation evaluation requests with optimized parallel comparison"""
|
313 |
request_id = f"req-{id(request)}" # Create unique ID for this request
|
314 |
logger.info(f"[{request_id}] π Starting new pronunciation evaluation request")
|
315 |
|
|
|
415 |
logger.error(f"[{request_id}] β ASR inference failed: {str(e)}")
|
416 |
return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
|
417 |
|
418 |
+
# OPTIMIZATION: Process all reference files at once
|
419 |
+
import multiprocessing
|
420 |
+
|
421 |
+
# Determine optimal number of workers based on CPU count
|
422 |
+
max_workers = min(multiprocessing.cpu_count(), len(reference_files))
|
423 |
results = []
|
424 |
+
|
|
|
|
|
|
|
425 |
# Use this if you want to limit the number of files to process
|
426 |
+
max_files_to_check = min(len(reference_files), 10) # Increased from 5 to 10
|
427 |
reference_files = reference_files[:max_files_to_check]
|
428 |
|
429 |
+
logger.info(f"[{request_id}] π Processing {len(reference_files)} reference files in parallel with {max_workers} workers")
|
430 |
|
431 |
# Function to process a single reference file
|
432 |
def process_reference_file(ref_file):
|
|
|
472 |
"error": str(e)
|
473 |
}
|
474 |
|
475 |
+
# OPTIMIZATION: Process all files simultaneously using ThreadPoolExecutor
|
476 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
477 |
+
results = list(executor.map(process_reference_file, reference_files))
|
478 |
+
|
479 |
+
# Find the best result after all processing is complete
|
480 |
+
best_score = 0
|
481 |
+
best_reference = None
|
482 |
+
best_transcription = None
|
483 |
+
|
484 |
+
for result in results:
|
485 |
+
if result["similarity_score"] > best_score:
|
486 |
+
best_score = result["similarity_score"]
|
487 |
+
best_reference = result["reference_file"]
|
488 |
+
best_transcription = result["reference_text"]
|
|
|
|
|
489 |
|
490 |
# Clean up temp files
|
491 |
try:
|