Spaces:

Coco-18
/

Kapamtalk

Running

App Files Files Community

Coco-18 commited on Mar 30

Commit

95121a2

verified ·

1 Parent(s): e13b2ed

Update evaluate.py

Browse files

Files changed (1) hide show

evaluate.py +82 -24

evaluate.py CHANGED Viewed

@@ -20,6 +20,9 @@ from translator import get_asr_model, get_asr_processor, LANGUAGE_CODES
 # Configure logging
 logger = logging.getLogger("speech_api")
 def calculate_similarity(text1, text2):
     """Calculate text similarity percentage."""
     def clean_text(text):
@@ -309,9 +312,9 @@ def handle_upload_reference(request, reference_dir, sample_rate):
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
-    """Handle pronunciation evaluation requests with optimized parallel comparison"""
-    request_id = f"req-{id(request)}"  # Create unique ID for this request
-    logger.info(f"[{request_id}] 🆕 Starting new pronunciation evaluation request")
     temp_dir = None
@@ -324,6 +327,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         return jsonify({"error": "ASR model not available"}), 503
     try:
         if "audio" not in request.files:
             logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
             return jsonify({"error": "No audio file uploaded"}), 400
@@ -337,6 +341,19 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
             return jsonify({"error": "Reference locator is required"}), 400
         # Construct full reference directory path
         reference_dir_path = os.path.join(reference_dir, reference_locator)
         logger.info(f"[{request_id}] 📁 Reference directory path: {reference_dir_path}")
@@ -366,7 +383,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             }), 404
         lang_code = LANGUAGE_CODES.get(language, language)
-        logger.info(f"[{request_id}] 🔄 Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
         # Create a request-specific temp directory to avoid conflicts
         temp_dir = os.path.join(output_dir, f"temp_{request_id}")
@@ -375,7 +392,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         # Process user audio
         user_audio_path = os.path.join(temp_dir, "user_audio_input.wav")
         with open(user_audio_path, 'wb') as f:
-            f.write(audio_file.read())
         try:
             logger.info(f"[{request_id}] 🔄 Processing user audio file")
@@ -415,18 +432,23 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
             return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
-        # OPTIMIZATION: Process all reference files at once
         import multiprocessing
-        # Determine optimal number of workers based on CPU count
-        max_workers = min(multiprocessing.cpu_count(), len(reference_files))
-        results = []
-        # Use this if you want to limit the number of files to process
-        max_files_to_check = min(len(reference_files), 10)  # Increased from 5 to 10
-        reference_files = reference_files[:max_files_to_check]
-        logger.info(f"[{request_id}] 🔄 Processing {len(reference_files)} reference files in parallel with {max_workers} workers")
         # Function to process a single reference file
         def process_reference_file(ref_file):
@@ -438,8 +460,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
                     ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
                 ref_waveform = ref_waveform.squeeze().numpy()
-                # Transcribe reference audio - use the local asr_model and asr_processor
-                # Remove language parameter if causing warnings
                 inputs = asr_processor(
                     ref_waveform,
                     sampling_rate=sample_rate,
@@ -472,20 +493,40 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
                     "error": str(e)
                 }
-        # OPTIMIZATION: Process all files simultaneously using ThreadPoolExecutor
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            results = list(executor.map(process_reference_file, reference_files))
-        # Find the best result after all processing is complete
         best_score = 0
         best_reference = None
         best_transcription = None
-        for result in results:
             if result["similarity_score"] > best_score:
                 best_score = result["similarity_score"]
                 best_reference = result["reference_file"]
                 best_transcription = result["reference_text"]
         # Clean up temp files
         try:
@@ -514,17 +555,34 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
         logger.info(f"[{request_id}] ✅ Evaluation complete")
         # Sort results by score descending
-        results.sort(key=lambda x: x["similarity_score"], reverse=True)
-        return jsonify({
             "is_correct": is_correct,
             "score": best_score,
             "feedback": feedback,
             "user_transcription": user_transcription,
             "best_reference_transcription": best_transcription,
             "reference_locator": reference_locator,
-            "details": results
         })
     except Exception as e:
         logger.error(f"[{request_id}] ❌ Unhandled exception in evaluation endpoint: {str(e)}")

 # Configure logging
 logger = logging.getLogger("speech_api")
+if not hasattr(handle_evaluation_request, 'cache'):
+    handle_evaluation_request.cache = {}
 def calculate_similarity(text1, text2):
     """Calculate text similarity percentage."""
     def clean_text(text):
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500
 def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
+    """Handle pronunciation evaluation requests with speed optimizations"""
+    request_id = f"req-{id(request)}"
+    logger.info(f"[{request_id}] 🆕 Starting pronunciation evaluation request")
     temp_dir = None
         return jsonify({"error": "ASR model not available"}), 503
     try:
+        # OPTIMIZATION 1: Check cache first for identical audio
         if "audio" not in request.files:
             logger.warning(f"[{request_id}] ⚠️ Evaluation request missing audio file")
             return jsonify({"error": "No audio file uploaded"}), 400
             logger.warning(f"[{request_id}] ⚠️ No reference locator provided")
             return jsonify({"error": "Reference locator is required"}), 400
+        # OPTIMIZATION 2: Simple caching based on audio content hash + reference_locator
+        audio_content = audio_file.read()
+        audio_file.seek(0)  # Reset file pointer after reading
+        import hashlib
+        audio_hash = hashlib.md5(audio_content).hexdigest()
+        cache_key = f"{audio_hash}_{reference_locator}_{language}"
+        # Check in-memory cache (define EVALUATION_CACHE at module level)
+        if hasattr(handle_evaluation_request, 'cache') and cache_key in handle_evaluation_request.cache:
+            logger.info(f"[{request_id}] ✅ Using cached evaluation result")
+            return handle_evaluation_request.cache[cache_key]
         # Construct full reference directory path
         reference_dir_path = os.path.join(reference_dir, reference_locator)
         logger.info(f"[{request_id}] 📁 Reference directory path: {reference_dir_path}")
             }), 404
         lang_code = LANGUAGE_CODES.get(language, language)
+        logger.info(f"[{request_id}] 🔄 Evaluating pronunciation for reference: {reference_locator}")
         # Create a request-specific temp directory to avoid conflicts
         temp_dir = os.path.join(output_dir, f"temp_{request_id}")
         # Process user audio
         user_audio_path = os.path.join(temp_dir, "user_audio_input.wav")
         with open(user_audio_path, 'wb') as f:
+            f.write(audio_content)  # Use the content we already read
         try:
             logger.info(f"[{request_id}] 🔄 Processing user audio file")
             logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
             return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
+        # OPTIMIZATION 3: Use a smaller sample of reference files
         import multiprocessing
+        import random
+        # OPTIMIZATION 4: Limit to just a few files for initial comparison
+        # If we have many reference files, randomly sample some for quick evaluation
+        if len(reference_files) > 3:
+            # Randomly select 3 files for faster comparison
+            reference_files_sample = random.sample(reference_files, 3)
+        else:
+            reference_files_sample = reference_files
+        # Determine optimal number of workers based on CPU count (but keep it small)
+        max_workers = min(multiprocessing.cpu_count(), len(reference_files_sample), 3)
+        initial_results = []
+        logger.info(f"[{request_id}] 🔄 Quick scan: processing {len(reference_files_sample)} reference files")
         # Function to process a single reference file
         def process_reference_file(ref_file):
                     ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
                 ref_waveform = ref_waveform.squeeze().numpy()
+                # Transcribe reference audio
                 inputs = asr_processor(
                     ref_waveform,
                     sampling_rate=sample_rate,
                     "error": str(e)
                 }
+        # Process the sample files in parallel
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            initial_results = list(executor.map(process_reference_file, reference_files_sample))
+        # Find the best result from the initial sample
         best_score = 0
         best_reference = None
         best_transcription = None
+        for result in initial_results:
             if result["similarity_score"] > best_score:
                 best_score = result["similarity_score"]
                 best_reference = result["reference_file"]
                 best_transcription = result["reference_text"]
+        # OPTIMIZATION 5: If we already found a very good match, don't process more files
+        all_results = initial_results.copy()
+        remaining_files = [f for f in reference_files if f not in reference_files_sample]
+        # Only process more files if our best score isn't already very good
+        if best_score < 80.0 and remaining_files:
+            logger.info(f"[{request_id}] 🔄 Score {best_score:.2f}% not high enough, checking {len(remaining_files)} more references")
+            # Process remaining files
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                additional_results = list(executor.map(process_reference_file, remaining_files[:5]))  # Process max 5 more
+                all_results.extend(additional_results)
+                # Update best result if we found a better one
+                for result in additional_results:
+                    if result["similarity_score"] > best_score:
+                        best_score = result["similarity_score"]
+                        best_reference = result["reference_file"]
+                        best_transcription = result["reference_text"]
         # Clean up temp files
         try:
         logger.info(f"[{request_id}] ✅ Evaluation complete")
         # Sort results by score descending
+        all_results.sort(key=lambda x: x["similarity_score"], reverse=True)
+        # Create response
+        response = jsonify({
             "is_correct": is_correct,
             "score": best_score,
             "feedback": feedback,
             "user_transcription": user_transcription,
             "best_reference_transcription": best_transcription,
             "reference_locator": reference_locator,
+            "details": all_results,
+            "total_references_compared": len(all_results),
+            "total_available_references": len(reference_files),
+            "quick_evaluation": True
         })
+        # OPTIMIZATION 6: Cache the result for future requests
+        if not hasattr(handle_evaluation_request, 'cache'):
+            handle_evaluation_request.cache = {}
+        # Store in cache (limit cache size to avoid memory issues)
+        MAX_CACHE_SIZE = 50
+        handle_evaluation_request.cache[cache_key] = response
+        if len(handle_evaluation_request.cache) > MAX_CACHE_SIZE:
+            # Remove oldest entry (simplified approach)
+            handle_evaluation_request.cache.pop(next(iter(handle_evaluation_request.cache)))
+        return response
     except Exception as e:
         logger.error(f"[{request_id}] ❌ Unhandled exception in evaluation endpoint: {str(e)}")