Coco-18 commited on
Commit
203cc78
Β·
verified Β·
1 Parent(s): 7c33098

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +23 -25
evaluate.py CHANGED
@@ -309,7 +309,7 @@ def handle_upload_reference(request, reference_dir, sample_rate):
309
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
310
 
311
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
312
- """Handle pronunciation evaluation requests"""
313
  request_id = f"req-{id(request)}" # Create unique ID for this request
314
  logger.info(f"[{request_id}] πŸ†• Starting new pronunciation evaluation request")
315
 
@@ -415,18 +415,18 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
415
  logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
416
  return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
417
 
418
- # Process reference files in batches
419
- batch_size = 2 # Process 2 files at a time - adjust based on your hardware
 
 
 
420
  results = []
421
- best_score = 0
422
- best_reference = None
423
- best_transcription = None
424
-
425
  # Use this if you want to limit the number of files to process
426
- max_files_to_check = min(5, len(reference_files)) # Check at most 5 files
427
  reference_files = reference_files[:max_files_to_check]
428
 
429
- logger.info(f"[{request_id}] πŸ”„ Processing {len(reference_files)} reference files in batches of {batch_size}")
430
 
431
  # Function to process a single reference file
432
  def process_reference_file(ref_file):
@@ -472,22 +472,20 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
472
  "error": str(e)
473
  }
474
 
475
- # Process files in batches using ThreadPoolExecutor
476
- with ThreadPoolExecutor(max_workers=batch_size) as executor:
477
- batch_results = list(executor.map(process_reference_file, reference_files))
478
- results.extend(batch_results)
479
-
480
- # Find the best result
481
- for result in batch_results:
482
- if result["similarity_score"] > best_score:
483
- best_score = result["similarity_score"]
484
- best_reference = result["reference_file"]
485
- best_transcription = result["reference_text"]
486
-
487
- # Exit early if we found a very good match (optional)
488
- if best_score > 80.0:
489
- logger.info(f"[{request_id}] 🏁 Found excellent match: {best_score:.2f}%")
490
- break
491
 
492
  # Clean up temp files
493
  try:
 
309
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
310
 
311
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
312
+ """Handle pronunciation evaluation requests with optimized parallel comparison"""
313
  request_id = f"req-{id(request)}" # Create unique ID for this request
314
  logger.info(f"[{request_id}] πŸ†• Starting new pronunciation evaluation request")
315
 
 
415
  logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
416
  return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
417
 
418
+ # OPTIMIZATION: Process all reference files at once
419
+ import multiprocessing
420
+
421
+ # Determine optimal number of workers based on CPU count
422
+ max_workers = min(multiprocessing.cpu_count(), len(reference_files))
423
  results = []
424
+
 
 
 
425
  # Use this if you want to limit the number of files to process
426
+ max_files_to_check = min(len(reference_files), 10) # Increased from 5 to 10
427
  reference_files = reference_files[:max_files_to_check]
428
 
429
+ logger.info(f"[{request_id}] πŸ”„ Processing {len(reference_files)} reference files in parallel with {max_workers} workers")
430
 
431
  # Function to process a single reference file
432
  def process_reference_file(ref_file):
 
472
  "error": str(e)
473
  }
474
 
475
+ # OPTIMIZATION: Process all files simultaneously using ThreadPoolExecutor
476
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
477
+ results = list(executor.map(process_reference_file, reference_files))
478
+
479
+ # Find the best result after all processing is complete
480
+ best_score = 0
481
+ best_reference = None
482
+ best_transcription = None
483
+
484
+ for result in results:
485
+ if result["similarity_score"] > best_score:
486
+ best_score = result["similarity_score"]
487
+ best_reference = result["reference_file"]
488
+ best_transcription = result["reference_text"]
 
 
489
 
490
  # Clean up temp files
491
  try: