Coco-18 commited on
Commit
a26b533
Β·
verified Β·
1 Parent(s): ceb8cef

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +308 -117
evaluate.py CHANGED
@@ -1,4 +1,4 @@
1
- # evaluate.py - Handles evaluation and comparing tasks
2
 
3
  import os
4
  import glob
@@ -13,6 +13,9 @@ from pydub import AudioSegment
13
  from flask import jsonify
14
  from werkzeug.utils import secure_filename
15
  from concurrent.futures import ThreadPoolExecutor
 
 
 
16
 
17
  # Import necessary functions from translator.py
18
  from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
@@ -20,9 +23,18 @@ from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
20
  # Configure logging
21
  logger = logging.getLogger("speech_api")
22
 
23
- # Initialize cache at module level instead
 
 
 
 
24
  EVALUATION_CACHE = {}
25
 
 
 
 
 
 
26
  def calculate_similarity(text1, text2):
27
  """Calculate text similarity percentage."""
28
  def clean_text(text):
@@ -105,8 +117,130 @@ def search_reference_directories():
105
 
106
  return found_dirs
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def init_reference_audio(reference_dir, output_dir):
109
- """Initialize reference audio directories and return the working directory path"""
110
  try:
111
  # Create the output directory first
112
  os.makedirs(output_dir, exist_ok=True)
@@ -179,7 +313,7 @@ def init_reference_audio(reference_dir, output_dir):
179
  except Exception as e:
180
  logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
181
 
182
- # Log the final contents, excluding dummy files - MODIFIED HERE
183
  pattern_dirs = [d for d in os.listdir(working_dir)
184
  if os.path.isdir(os.path.join(working_dir, d))]
185
 
@@ -191,8 +325,6 @@ def init_reference_audio(reference_dir, output_dir):
191
  # Count only non-dummy files
192
  valid_files = [f for f in wav_files if "dummy_reference" not in f]
193
  total_wav_files += len(valid_files)
194
- # Remove the individual directory logging
195
- # logger.info(f" - {pattern}: {len(valid_files)} valid WAV files")
196
 
197
  logger.info(f"πŸ“Š Total pattern directories: {len(pattern_dirs)}, Total reference WAV files: {total_wav_files}")
198
 
@@ -207,6 +339,9 @@ def init_reference_audio(reference_dir, output_dir):
207
  except Exception as e:
208
  logger.warning(f"⚠️ Failed to remove dummy file {dummy}: {str(e)}")
209
 
 
 
 
210
  return working_dir
211
 
212
  except Exception as e:
@@ -225,7 +360,9 @@ def init_reference_audio(reference_dir, output_dir):
225
  return reference_dir
226
 
227
  def handle_upload_reference(request, reference_dir, sample_rate):
228
- """Handle upload of reference audio files"""
 
 
229
  try:
230
  if "audio" not in request.files:
231
  logger.warning("⚠️ Reference upload missing audio file")
@@ -295,6 +432,22 @@ def handle_upload_reference(request, reference_dir, sample_rate):
295
  os.unlink(temp_path)
296
  except:
297
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  except Exception as e:
299
  logger.error(f"❌ Reference audio processing failed: {str(e)}")
300
  return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
@@ -305,7 +458,8 @@ def handle_upload_reference(request, reference_dir, sample_rate):
305
  "message": "Reference audio uploaded successfully",
306
  "reference_word": reference_word,
307
  "file": filename,
308
- "total_references": len(references)
 
309
  })
310
 
311
  except Exception as e:
@@ -314,7 +468,9 @@ def handle_upload_reference(request, reference_dir, sample_rate):
314
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
315
 
316
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
317
- """Handle pronunciation evaluation requests with speed optimizations"""
 
 
318
  request_id = f"req-{id(request)}"
319
  logger.info(f"[{request_id}] πŸ†• Starting pronunciation evaluation request")
320
 
@@ -329,7 +485,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
329
  return jsonify({"error": "ASR model not available"}), 503
330
 
331
  try:
332
- # OPTIMIZATION 1: Check cache first for identical audio
333
  if "audio" not in request.files:
334
  logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
335
  return jsonify({"error": "No audio file uploaded"}), 400
@@ -343,11 +499,10 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
343
  logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
344
  return jsonify({"error": "Reference locator is required"}), 400
345
 
346
- # OPTIMIZATION 2: Simple caching based on audio content hash + reference_locator
347
  audio_content = audio_file.read()
348
  audio_file.seek(0) # Reset file pointer after reading
349
 
350
- import hashlib
351
  audio_hash = hashlib.md5(audio_content).hexdigest()
352
  cache_key = f"{audio_hash}_{reference_locator}_{language}"
353
 
@@ -416,120 +571,121 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
416
  # Transcribe user audio
417
  try:
418
  logger.info(f"[{request_id}] πŸ”„ Transcribing user audio")
419
- # Remove language parameter if causing warnings
420
- inputs = asr_processor(
421
- user_waveform,
422
- sampling_rate=sample_rate,
423
- return_tensors="pt"
424
- )
425
- inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
426
-
427
- with torch.no_grad():
428
- logits = asr_model(**inputs).logits
429
- ids = torch.argmax(logits, dim=-1)[0]
430
- user_transcription = asr_processor.decode(ids)
431
-
432
  logger.info(f"[{request_id}] βœ… User transcription: '{user_transcription}'")
433
  except Exception as e:
434
  logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
435
  return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
436
 
437
- # OPTIMIZATION 3: Use a smaller sample of reference files
438
- import multiprocessing
439
- import random
440
 
441
- # OPTIMIZATION 4: Limit to just a few files for initial comparison
442
- # If we have many reference files, randomly sample some for quick evaluation
443
- if len(reference_files) > 3:
444
- # Randomly select 3 files for faster comparison
445
- reference_files_sample = random.sample(reference_files, 3)
446
- else:
447
- reference_files_sample = reference_files
448
 
449
- # Determine optimal number of workers based on CPU count (but keep it small)
450
- max_workers = min(multiprocessing.cpu_count(), len(reference_files_sample), 3)
451
- initial_results = []
452
-
453
- logger.info(f"[{request_id}] πŸ”„ Quick scan: processing {len(reference_files_sample)} reference files")
454
-
455
- # Function to process a single reference file
456
- def process_reference_file(ref_file):
457
- ref_filename = os.path.basename(ref_file)
458
- try:
459
- # Load and resample reference audio
460
- ref_waveform, ref_sr = torchaudio.load(ref_file)
461
- if ref_sr != sample_rate:
462
- ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
463
- ref_waveform = ref_waveform.squeeze().numpy()
464
-
465
- # Transcribe reference audio
466
- inputs = asr_processor(
467
- ref_waveform,
468
- sampling_rate=sample_rate,
469
- return_tensors="pt"
470
- )
471
- inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
472
-
473
- with torch.no_grad():
474
- logits = asr_model(**inputs).logits
475
- ids = torch.argmax(logits, dim=-1)[0]
476
- ref_transcription = asr_processor.decode(ids)
477
-
478
- # Calculate similarity
479
  similarity = calculate_similarity(user_transcription, ref_transcription)
480
-
481
  logger.info(
482
  f"[{request_id}] πŸ“Š Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
483
-
484
- return {
485
  "reference_file": ref_filename,
486
  "reference_text": ref_transcription,
487
  "similarity_score": similarity
488
- }
489
- except Exception as e:
490
- logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
491
- return {
492
- "reference_file": ref_filename,
493
- "reference_text": "Error",
494
- "similarity_score": 0,
495
- "error": str(e)
496
- }
497
-
498
- # Process the sample files in parallel
499
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
500
- initial_results = list(executor.map(process_reference_file, reference_files_sample))
501
-
502
- # Find the best result from the initial sample
503
- best_score = 0
504
- best_reference = None
505
- best_transcription = None
506
-
507
- for result in initial_results:
508
- if result["similarity_score"] > best_score:
509
- best_score = result["similarity_score"]
510
- best_reference = result["reference_file"]
511
- best_transcription = result["reference_text"]
512
-
513
- # OPTIMIZATION 5: If we already found a very good match, don't process more files
514
- all_results = initial_results.copy()
515
- remaining_files = [f for f in reference_files if f not in reference_files_sample]
516
-
517
- # Only process more files if our best score isn't already very good
518
- if best_score < 80.0 and remaining_files:
519
- logger.info(f"[{request_id}] πŸ”„ Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references")
520
 
521
- # Process remaining files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
523
- additional_results = list(executor.map(process_reference_file, remaining_files[:5])) # Process max 5 more
524
- all_results.extend(additional_results)
525
 
526
- # Update best result if we found a better one
527
- for result in additional_results:
528
- if result["similarity_score"] > best_score:
529
- best_score = result["similarity_score"]
530
- best_reference = result["reference_file"]
531
- best_transcription = result["reference_text"]
532
-
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  # Clean up temp files
534
  try:
535
  if temp_dir and os.path.exists(temp_dir):
@@ -537,6 +693,20 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
537
  logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
538
  except Exception as e:
539
  logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  # Determine feedback based on score
542
  is_correct = best_score >= 70.0
@@ -554,11 +724,8 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
554
 
555
  logger.info(f"[{request_id}] πŸ“Š Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
556
  logger.info(f"[{request_id}] πŸ“ Feedback: '{feedback}'")
557
- logger.info(f"[{request_id}] βœ… Evaluation complete")
558
 
559
- # Sort results by score descending
560
- all_results.sort(key=lambda x: x["similarity_score"], reverse=True)
561
-
562
  # Create response
563
  response = jsonify({
564
  "is_correct": is_correct,
@@ -570,10 +737,11 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
570
  "details": all_results,
571
  "total_references_compared": len(all_results),
572
  "total_available_references": len(reference_files),
573
- "quick_evaluation": True
 
574
  })
575
 
576
- # OPTIMIZATION 6: Cache the result for future requests using module-level cache
577
  MAX_CACHE_SIZE = 50
578
  EVALUATION_CACHE[cache_key] = response
579
  if len(EVALUATION_CACHE) > MAX_CACHE_SIZE:
@@ -593,4 +761,27 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
593
  except:
594
  pass
595
 
596
- return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluate.py - Handles evaluation and comparing tasks with reference preprocessing
2
 
3
  import os
4
  import glob
 
13
  from flask import jsonify
14
  from werkzeug.utils import secure_filename
15
  from concurrent.futures import ThreadPoolExecutor
16
+ import hashlib
17
+ import threading
18
+ import time
19
 
20
  # Import necessary functions from translator.py
21
  from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
 
23
  # Configure logging
24
  logger = logging.getLogger("speech_api")
25
 
26
+ # Enhanced cache structure to store preprocessed reference audio data
27
+ # Format: {reference_locator: {reference_file: {waveform, transcription, processed_at}}}
28
+ REFERENCE_CACHE = {}
29
+
30
+ # Traditional evaluation cache for quick responses to identical requests
31
  EVALUATION_CACHE = {}
32
 
33
+ # A flag to indicate if preprocessing is complete
34
+ PREPROCESSING_COMPLETE = False
35
+ PREPROCESSING_LOCK = threading.Lock()
36
+ PREPROCESSING_THREAD = None
37
+
38
  def calculate_similarity(text1, text2):
39
  """Calculate text similarity percentage."""
40
  def clean_text(text):
 
117
 
118
  return found_dirs
119
 
120
+ def transcribe_audio(waveform, sample_rate, asr_model, asr_processor):
121
+ """Helper function to transcribe audio using the ASR model"""
122
+ inputs = asr_processor(
123
+ waveform,
124
+ sampling_rate=sample_rate,
125
+ return_tensors="pt"
126
+ )
127
+ inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
128
+
129
+ with torch.no_grad():
130
+ logits = asr_model(**inputs).logits
131
+ ids = torch.argmax(logits, dim=-1)[0]
132
+ transcription = asr_processor.decode(ids)
133
+
134
+ return transcription
135
+
136
+ def preprocess_reference_file(ref_file, sample_rate, asr_model, asr_processor):
137
+ """Preprocess a single reference file and return its transcription"""
138
+ ref_filename = os.path.basename(ref_file)
139
+ try:
140
+ # Load and resample reference audio
141
+ ref_waveform, ref_sr = torchaudio.load(ref_file)
142
+ if ref_sr != sample_rate:
143
+ ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
144
+ ref_waveform = ref_waveform.squeeze().numpy()
145
+
146
+ # Transcribe reference audio
147
+ ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor)
148
+
149
+ logger.debug(f"Preprocessed reference file: {ref_filename}, transcription: '{ref_transcription}'")
150
+
151
+ return {
152
+ "waveform": ref_waveform,
153
+ "transcription": ref_transcription,
154
+ "processed_at": time.time()
155
+ }
156
+ except Exception as e:
157
+ logger.error(f"❌ Error preprocessing {ref_filename}: {str(e)}")
158
+ return None
159
+
160
+ def preprocess_all_references(reference_dir, sample_rate=16000):
161
+ """Preprocess all reference audio files at startup"""
162
+ global PREPROCESSING_COMPLETE, REFERENCE_CACHE
163
+
164
+ logger.info("πŸš€ Starting preprocessing of all reference audio files...")
165
+
166
+ # Get ASR model and processor
167
+ asr_model = get_asr_model()
168
+ asr_processor = get_asr_processor()
169
+
170
+ if asr_model is None or asr_processor is None:
171
+ logger.error("❌ Cannot preprocess reference audio - ASR models not loaded")
172
+ return False
173
+
174
+ try:
175
+ pattern_dirs = [d for d in os.listdir(reference_dir)
176
+ if os.path.isdir(os.path.join(reference_dir, d))]
177
+
178
+ total_processed = 0
179
+ start_time = time.time()
180
+
181
+ # Process each reference pattern directory
182
+ for pattern in pattern_dirs:
183
+ pattern_path = os.path.join(reference_dir, pattern)
184
+ reference_files = glob.glob(os.path.join(pattern_path, "*.wav"))
185
+ reference_files = [f for f in reference_files if "dummy_reference" not in f]
186
+
187
+ if not reference_files:
188
+ continue
189
+
190
+ # Initialize cache for this pattern if needed
191
+ if pattern not in REFERENCE_CACHE:
192
+ REFERENCE_CACHE[pattern] = {}
193
+
194
+ logger.info(f"πŸ”„ Preprocessing {len(reference_files)} references for pattern: {pattern}")
195
+
196
+ # Determine optimal number of workers
197
+ max_workers = min(os.cpu_count() or 4, len(reference_files), 5)
198
+
199
+ # Process files in parallel
200
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
201
+ tasks = {
202
+ executor.submit(preprocess_reference_file, ref_file, sample_rate, asr_model, asr_processor):
203
+ ref_file for ref_file in reference_files
204
+ }
205
+
206
+ for future in tasks:
207
+ ref_file = tasks[future]
208
+ try:
209
+ result = future.result()
210
+ if result:
211
+ REFERENCE_CACHE[pattern][os.path.basename(ref_file)] = result
212
+ total_processed += 1
213
+ except Exception as e:
214
+ logger.error(f"❌ Failed to process {ref_file}: {str(e)}")
215
+
216
+ elapsed_time = time.time() - start_time
217
+ logger.info(f"βœ… Preprocessing complete! Processed {total_processed} reference files in {elapsed_time:.2f} seconds")
218
+
219
+ with PREPROCESSING_LOCK:
220
+ PREPROCESSING_COMPLETE = True
221
+
222
+ return True
223
+
224
+ except Exception as e:
225
+ logger.error(f"❌ Error during reference preprocessing: {str(e)}")
226
+ logger.debug(f"Stack trace: {traceback.format_exc()}")
227
+ return False
228
+
229
+ def start_preprocessing_thread(reference_dir, sample_rate=16000):
230
+ """Start preprocessing in a background thread"""
231
+ global PREPROCESSING_THREAD
232
+
233
+ def preprocessing_worker():
234
+ preprocess_all_references(reference_dir, sample_rate)
235
+
236
+ PREPROCESSING_THREAD = threading.Thread(target=preprocessing_worker)
237
+ PREPROCESSING_THREAD.daemon = True # Allow thread to exit when main thread exits
238
+ PREPROCESSING_THREAD.start()
239
+
240
+ logger.info("🧡 Started reference audio preprocessing in background thread")
241
+
242
  def init_reference_audio(reference_dir, output_dir):
243
+ """Initialize reference audio directories and start preprocessing"""
244
  try:
245
  # Create the output directory first
246
  os.makedirs(output_dir, exist_ok=True)
 
313
  except Exception as e:
314
  logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
315
 
316
+ # Log the final contents, excluding dummy files
317
  pattern_dirs = [d for d in os.listdir(working_dir)
318
  if os.path.isdir(os.path.join(working_dir, d))]
319
 
 
325
  # Count only non-dummy files
326
  valid_files = [f for f in wav_files if "dummy_reference" not in f]
327
  total_wav_files += len(valid_files)
 
 
328
 
329
  logger.info(f"πŸ“Š Total pattern directories: {len(pattern_dirs)}, Total reference WAV files: {total_wav_files}")
330
 
 
339
  except Exception as e:
340
  logger.warning(f"⚠️ Failed to remove dummy file {dummy}: {str(e)}")
341
 
342
+ # Start preprocessing references in background
343
+ start_preprocessing_thread(working_dir)
344
+
345
  return working_dir
346
 
347
  except Exception as e:
 
360
  return reference_dir
361
 
362
  def handle_upload_reference(request, reference_dir, sample_rate):
363
+ """Handle upload of reference audio files and preprocess immediately"""
364
+ global REFERENCE_CACHE
365
+
366
  try:
367
  if "audio" not in request.files:
368
  logger.warning("⚠️ Reference upload missing audio file")
 
432
  os.unlink(temp_path)
433
  except:
434
  pass
435
+
436
+ # Immediately preprocess this new reference file and add to cache
437
+ asr_model = get_asr_model()
438
+ asr_processor = get_asr_processor()
439
+
440
+ if asr_model and asr_processor:
441
+ # Initialize cache for this pattern if needed
442
+ if reference_word not in REFERENCE_CACHE:
443
+ REFERENCE_CACHE[reference_word] = {}
444
+
445
+ # Preprocess and add to cache
446
+ result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor)
447
+ if result:
448
+ REFERENCE_CACHE[reference_word][filename] = result
449
+ logger.info(f"βœ… New reference audio preprocessed and added to cache: {filename}")
450
+
451
  except Exception as e:
452
  logger.error(f"❌ Reference audio processing failed: {str(e)}")
453
  return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
 
458
  "message": "Reference audio uploaded successfully",
459
  "reference_word": reference_word,
460
  "file": filename,
461
+ "total_references": len(references),
462
+ "preprocessed": True
463
  })
464
 
465
  except Exception as e:
 
468
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
469
 
470
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
471
+ """Handle pronunciation evaluation requests with preprocessing optimization"""
472
+ global REFERENCE_CACHE, PREPROCESSING_COMPLETE
473
+
474
  request_id = f"req-{id(request)}"
475
  logger.info(f"[{request_id}] πŸ†• Starting pronunciation evaluation request")
476
 
 
485
  return jsonify({"error": "ASR model not available"}), 503
486
 
487
  try:
488
+ # Check for basic request requirements
489
  if "audio" not in request.files:
490
  logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
491
  return jsonify({"error": "No audio file uploaded"}), 400
 
499
  logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
500
  return jsonify({"error": "Reference locator is required"}), 400
501
 
502
+ # OPTIMIZATION: Simple caching based on audio content hash + reference_locator
503
  audio_content = audio_file.read()
504
  audio_file.seek(0) # Reset file pointer after reading
505
 
 
506
  audio_hash = hashlib.md5(audio_content).hexdigest()
507
  cache_key = f"{audio_hash}_{reference_locator}_{language}"
508
 
 
571
  # Transcribe user audio
572
  try:
573
  logger.info(f"[{request_id}] πŸ”„ Transcribing user audio")
574
+ user_transcription = transcribe_audio(user_waveform, sample_rate, asr_model, asr_processor)
 
 
 
 
 
 
 
 
 
 
 
 
575
  logger.info(f"[{request_id}] βœ… User transcription: '{user_transcription}'")
576
  except Exception as e:
577
  logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
578
  return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
579
 
580
+ # Check if we have preprocessed data for this reference locator
581
+ using_preprocessed = False
582
+ all_results = []
583
 
584
+ if reference_locator in REFERENCE_CACHE and REFERENCE_CACHE[reference_locator]:
585
+ using_preprocessed = True
586
+ logger.info(f"[{request_id}] πŸš€ Using preprocessed reference data for {reference_locator}")
 
 
 
 
587
 
588
+ # Compare with all cached references
589
+ for ref_filename, ref_data in REFERENCE_CACHE[reference_locator].items():
590
+ ref_transcription = ref_data["transcription"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  similarity = calculate_similarity(user_transcription, ref_transcription)
592
+
593
  logger.info(
594
  f"[{request_id}] πŸ“Š Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
595
+
596
+ all_results.append({
597
  "reference_file": ref_filename,
598
  "reference_text": ref_transcription,
599
  "similarity_score": similarity
600
+ })
601
+
602
+ else:
603
+ # If not preprocessed yet, do traditional processing
604
+ logger.info(f"[{request_id}] ⚠️ No preprocessed data available for {reference_locator}, processing on demand")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
+ # Process files in parallel with ThreadPoolExecutor
607
+ import random
608
+ import multiprocessing
609
+
610
+ # Determine optimal number of workers based on CPU count (but keep it small)
611
+ max_workers = min(multiprocessing.cpu_count(), len(reference_files), 3)
612
+
613
+ # Function to process a single reference file
614
+ def process_reference_file(ref_file):
615
+ ref_filename = os.path.basename(ref_file)
616
+ try:
617
+ # Load and resample reference audio
618
+ ref_waveform, ref_sr = torchaudio.load(ref_file)
619
+ if ref_sr != sample_rate:
620
+ ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
621
+ ref_waveform = ref_waveform.squeeze().numpy()
622
+
623
+ # Transcribe reference audio
624
+ ref_transcription = transcribe_audio(ref_waveform, sample_rate, asr_model, asr_processor)
625
+
626
+ # Add to cache for future use
627
+ if reference_locator not in REFERENCE_CACHE:
628
+ REFERENCE_CACHE[reference_locator] = {}
629
+
630
+ REFERENCE_CACHE[reference_locator][ref_filename] = {
631
+ "waveform": ref_waveform,
632
+ "transcription": ref_transcription,
633
+ "processed_at": time.time()
634
+ }
635
+
636
+ # Calculate similarity
637
+ similarity = calculate_similarity(user_transcription, ref_transcription)
638
+
639
+ logger.info(
640
+ f"[{request_id}] πŸ“Š Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
641
+
642
+ return {
643
+ "reference_file": ref_filename,
644
+ "reference_text": ref_transcription,
645
+ "similarity_score": similarity
646
+ }
647
+ except Exception as e:
648
+ logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
649
+ return {
650
+ "reference_file": ref_filename,
651
+ "reference_text": "Error",
652
+ "similarity_score": 0,
653
+ "error": str(e)
654
+ }
655
+
656
+ # If we have many files, select a smaller sample for initial quick evaluation
657
+ if len(reference_files) > 3 and not using_preprocessed:
658
+ reference_files_sample = random.sample(reference_files, 3)
659
+ else:
660
+ reference_files_sample = reference_files
661
+
662
+ logger.info(f"[{request_id}] πŸ”„ Processing {len(reference_files_sample)} reference files")
663
+
664
+ # Process the files in parallel
665
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
666
+ initial_results = list(executor.map(process_reference_file, reference_files_sample))
667
+ all_results = initial_results.copy()
668
 
669
+ # If we didn't process all files and didn't find a good match, process more
670
+ if len(reference_files_sample) < len(reference_files) and not using_preprocessed:
671
+ # Find the best result so far
672
+ best_score = 0
673
+ for result in all_results:
674
+ if result["similarity_score"] > best_score:
675
+ best_score = result["similarity_score"]
676
+
677
+ # Only process more files if our best score isn't already very good
678
+ if best_score < 80.0:
679
+ remaining_files = [f for f in reference_files if f not in reference_files_sample]
680
+ logger.info(f"[{request_id}] πŸ”„ Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references")
681
+
682
+ # Limit how many additional files we process
683
+ additional_files = remaining_files[:5] # Process max 5 more
684
+
685
+ # Process remaining files
686
+ additional_results = list(executor.map(process_reference_file, additional_files))
687
+ all_results.extend(additional_results)
688
+
689
  # Clean up temp files
690
  try:
691
  if temp_dir and os.path.exists(temp_dir):
 
693
  logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
694
  except Exception as e:
695
  logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
696
+
697
+ # Find the best result
698
+ best_score = 0
699
+ best_reference = None
700
+ best_transcription = None
701
+
702
+ # Sort results by score descending
703
+ all_results.sort(key=lambda x: x["similarity_score"], reverse=True)
704
+
705
+ if all_results:
706
+ best_result = all_results[0]
707
+ best_score = best_result["similarity_score"]
708
+ best_reference = best_result["reference_file"]
709
+ best_transcription = best_result["reference_text"]
710
 
711
  # Determine feedback based on score
712
  is_correct = best_score >= 70.0
 
724
 
725
  logger.info(f"[{request_id}] πŸ“Š Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
726
  logger.info(f"[{request_id}] πŸ“ Feedback: '{feedback}'")
727
+ logger.info(f"[{request_id}] βœ… Evaluation complete using {'preprocessed' if using_preprocessed else 'on-demand'} reference data")
728
 
 
 
 
729
  # Create response
730
  response = jsonify({
731
  "is_correct": is_correct,
 
737
  "details": all_results,
738
  "total_references_compared": len(all_results),
739
  "total_available_references": len(reference_files),
740
+ "used_preprocessed_data": using_preprocessed,
741
+ "preprocessing_complete": PREPROCESSING_COMPLETE
742
  })
743
 
744
+ # Cache the result for future identical requests
745
  MAX_CACHE_SIZE = 50
746
  EVALUATION_CACHE[cache_key] = response
747
  if len(EVALUATION_CACHE) > MAX_CACHE_SIZE:
 
761
  except:
762
  pass
763
 
764
+ return jsonify({"error": f"Internal server error: {str(e)}"}), 500
765
+
766
+ # Add a new function to get preprocessing status
767
+ def get_preprocessing_status():
768
+ """Get the current status of reference audio preprocessing"""
769
+ global PREPROCESSING_COMPLETE, REFERENCE_CACHE
770
+
771
+ with PREPROCESSING_LOCK:
772
+ is_complete = PREPROCESSING_COMPLETE
773
+
774
+ # Count total preprocessed references
775
+ preprocessed_count = 0
776
+ for pattern, files in REFERENCE_CACHE.items():
777
+ preprocessed_count += len(files)
778
+
779
+ # Check if preprocessing thread is alive
780
+ thread_running = PREPROCESSING_THREAD is not None and PREPROCESSING_THREAD.is_alive()
781
+
782
+ return {
783
+ "complete": is_complete,
784
+ "preprocessed_files": preprocessed_count,
785
+ "patterns_cached": len(REFERENCE_CACHE),
786
+ "thread_running": thread_running
787
+ }