Spaces:

Coco-18
/

Kapamtalk

Sleeping

App Files Files Community

Coco-18 commited on Mar 26

Commit

53ae3c9

verified ·

1 Parent(s): edd4f88

Update evaluate.py

Browse files

Files changed (1) hide show

evaluate.py +174 -174

evaluate.py CHANGED Viewed

@@ -326,112 +326,70 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
                 logger.warning(f"[{request_id}] ⚠️ Created missing reference directory: {reference_dir_path}")
             except Exception as e:
                 logger.error(f"[{request_id}] ❌ Failed to create reference directory: {str(e)}")
-                return jsonify({"error": f"Reference audio directory not found: {reference_locator}"}), 404
-    # Check for reference files
-    reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
-    logger.info(f"[{request_id}] 📁 Found {len(reference_files)} reference files")
-    # If no reference files exist, create a dummy reference file
-    if not reference_files:
-        logger.warning(f"[{request_id}] ⚠️ No reference audio files found in {reference_dir_path}")
-        # Create a dummy reference file
-        try:
-            dummy_file_path = os.path.join(reference_dir_path, "dummy_reference.wav")
-            logger.info(f"[{request_id}] 🔄 Creating dummy reference file: {dummy_file_path}")
-            # Create a 1-second audio file with a slight sound
-            silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
-            # Add a tiny bit of noise to help ASR
-            for i in range(50, 950, 300):
-            silent_audio = silent_audio.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
-            silent_audio.export(dummy_file_path, format="wav")
-            # Add it to the list of reference files
-            reference_files = [dummy_file_path]
-            logger.info(f"[{request_id}] ✅ Created dummy reference file for testing")
-        except Exception as e:
-            logger.error(f"[{request_id}] ❌ Failed to create dummy reference: {str(e)}")
-            return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
-    lang_code = LANGUAGE_CODES.get(language, language)
-    logger.info(f"[{request_id}] 🔄 Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
-    # Create a request-specific temp directory to avoid conflicts
-    temp_dir = os.path.join(output_dir, f"temp_{request_id}")
-    os.makedirs(temp_dir, exist_ok=True)
-    # Process user audio
-    user_audio_path = os.path.join(temp_dir, "user_audio_input.wav")
-    with open(user_audio_path, 'wb') as f:
-        f.write(audio_file.read())
-    try:
-        logger.info(f"[{request_id}] 🔄 Processing user audio file")
-        audio = AudioSegment.from_file(user_audio_path)
-        audio = audio.set_frame_rate(sample_rate).set_channels(1)
-        processed_path = os.path.join(temp_dir, "processed_user_audio.wav")
-        audio.export(processed_path, format="wav")
-        user_waveform, sr = torchaudio.load(processed_path)
-        user_waveform = user_waveform.squeeze().numpy()
-        logger.info(f"[{request_id}] ✅ User audio processed: {sr}Hz, length: {len(user_waveform)} samples")
-        user_audio_path = processed_path
-    except Exception as e:
-        logger.error(f"[{request_id}] ❌ Audio processing failed: {str(e)}")
-        return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
-    # Transcribe user audio
-    try:
-        logger.info(f"[{request_id}] 🔄 Transcribing user audio")
-        # Remove language parameter if causing warnings
-        inputs = asr_processor(
-            user_waveform,
-            sampling_rate=sample_rate,
-            return_tensors="pt"
-        )
-        inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
-        with torch.no_grad():
-            logits = asr_model(**inputs).logits
-        ids = torch.argmax(logits, dim=-1)[0]
-        user_transcription = asr_processor.decode(ids)
-        logger.info(f"[{request_id}] ✅ User transcription: '{user_transcription}'")
-    except Exception as e:
-        logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
-        return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
-    # Process reference files in batches
-    batch_size = 2  # Process 2 files at a time - adjust based on your hardware
-    results = []
-    best_score = 0
-    best_reference = None
-    best_transcription = None
-    # Use this if you want to limit the number of files to process
-    max_files_to_check = min(5, len(reference_files))  # Check at most 5 files
-    reference_files = reference_files[:max_files_to_check]
-    logger.info(f"[{request_id}] 🔄 Processing {len(reference_files)} reference files in batches of {batch_size}")
-    # Function to process a single reference file
-    def process_reference_file(ref_file):
-        ref_filename = os.path.basename(ref_file)
         try:
-            # Load and resample reference audio
-            ref_waveform, ref_sr = torchaudio.load(ref_file)
-            if ref_sr != sample_rate:
-                ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
-            ref_waveform = ref_waveform.squeeze().numpy()
-            # Transcribe reference audio - use the local asr_model and asr_processor
             # Remove language parameter if causing warnings
             inputs = asr_processor(
-                ref_waveform,
                 sampling_rate=sample_rate,
                 return_tensors="pt"
             )
@@ -440,93 +398,135 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
             with torch.no_grad():
                 logits = asr_model(**inputs).logits
             ids = torch.argmax(logits, dim=-1)[0]
-            ref_transcription = asr_processor.decode(ids)
-            # Calculate similarity
-            similarity = calculate_similarity(user_transcription, ref_transcription)
-            logger.info(
-                f"[{request_id}] 📊 Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
-            return {
-                "reference_file": ref_filename,
-                "reference_text": ref_transcription,
-                "similarity_score": similarity
-            }
         except Exception as e:
-            logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
-            return {
-                "reference_file": ref_filename,
-                "reference_text": "Error",
-                "similarity_score": 0,
-                "error": str(e)
-            }
-    # Process files in batches using ThreadPoolExecutor
-    with ThreadPoolExecutor(max_workers=batch_size) as executor:
-        batch_results = list(executor.map(process_reference_file, reference_files))
-        results.extend(batch_results)
-        # Find the best result
-        for result in batch_results:
-            if result["similarity_score"] > best_score:
-                best_score = result["similarity_score"]
-                best_reference = result["reference_file"]
-                best_transcription = result["reference_text"]
-                # Exit early if we found a very good match (optional)
-                if best_score > 80.0:
-                    logger.info(f"[{request_id}] 🏁 Found excellent match: {best_score:.2f}%")
-                    break
-    # Clean up temp files
-    try:
-        if temp_dir and os.path.exists(temp_dir):
-        shutil.rmtree(temp_dir)
-        logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
     except Exception as e:
-        logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
-    # Determine feedback based on score
-    is_correct = best_score >= 70.0
-    if best_score >= 90.0:
-        feedback = "Perfect pronunciation! Excellent job!"
-    elif best_score >= 80.0:
-        feedback = "Great pronunciation! Your accent is very good."
-    elif best_score >= 70.0:
-        feedback = "Good pronunciation. Keep practicing!"
-    elif best_score >= 50.0:
-        feedback = "Fair attempt. Try focusing on the syllables that differ from the sample."
-    else:
-        feedback = "Try again. Listen carefully to the sample pronunciation."
-    logger.info(f"[{request_id}] 📊 Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
-    logger.info(f"[{request_id}] 📝 Feedback: '{feedback}'")
-    logger.info(f"[{request_id}] ✅ Evaluation complete")
-    # Sort results by score descending
-    results.sort(key=lambda x: x["similarity_score"], reverse=True)
-    return jsonify({
-        "is_correct": is_correct,
-        "score": best_score,
-        "feedback": feedback,
-        "user_transcription": user_transcription,
-        "best_reference_transcription": best_transcription,
-        "reference_locator": reference_locator,
-        "details": results
-    })
-except Exception as e:
-    logger.error(f"[{request_id}] ❌ Unhandled exception in evaluation endpoint: {str(e)}")
-    logger.debug(f"[{request_id}] Stack trace: {traceback.format_exc()}")
-    # Clean up on error
-    try:
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
-    except:
-        pass
-    return jsonify({"error": f"Internal server error: {str(e)}"}), 500

                 logger.warning(f"[{request_id}] ⚠️ Created missing reference directory: {reference_dir_path}")
             except Exception as e:
                 logger.error(f"[{request_id}] ❌ Failed to create reference directory: {str(e)}")
+                    return jsonify({"error": f"Reference audio directory not found: {reference_locator}"}), 404
+        # Check for reference files
+        reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
+        logger.info(f"[{request_id}] 📁 Found {len(reference_files)} reference files")
+        # If no reference files exist, create a dummy reference file
+        if not reference_files:
+            logger.warning(f"[{request_id}] ⚠️ No reference audio files found in {reference_dir_path}")
+            # Create a dummy reference file
+            try:
+                dummy_file_path = os.path.join(reference_dir_path, "dummy_reference.wav")
+                logger.info(f"[{request_id}] 🔄 Creating dummy reference file: {dummy_file_path}")
+                # Create a 1-second audio file with a slight sound
+                silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
+                # Add a tiny bit of noise to help ASR
+                for i in range(50, 950, 300):
+                    silent_audio = silent_audio.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
+                silent_audio.export(dummy_file_path, format="wav")
+                # Add it to the list of reference files
+                reference_files = [dummy_file_path]
+                logger.info(f"[{request_id}] ✅ Created dummy reference file for testing")
+            except Exception as e:
+                logger.error(f"[{request_id}] ❌ Failed to create dummy reference: {str(e)}")
+                return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
+        lang_code = LANGUAGE_CODES.get(language, language)
+        logger.info(f"[{request_id}] 🔄 Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
+        # Create a request-specific temp directory to avoid conflicts
+        temp_dir = os.path.join(output_dir, f"temp_{request_id}")
+        os.makedirs(temp_dir, exist_ok=True)
+        # Process user audio
+        user_audio_path = os.path.join(temp_dir, "user_audio_input.wav")
+        with open(user_audio_path, 'wb') as f:
+            f.write(audio_file.read())
+        try:
+            logger.info(f"[{request_id}] 🔄 Processing user audio file")
+            audio = AudioSegment.from_file(user_audio_path)
+            audio = audio.set_frame_rate(sample_rate).set_channels(1)
+            processed_path = os.path.join(temp_dir, "processed_user_audio.wav")
+            audio.export(processed_path, format="wav")
+            user_waveform, sr = torchaudio.load(processed_path)
+            user_waveform = user_waveform.squeeze().numpy()
+            logger.info(f"[{request_id}] ✅ User audio processed: {sr}Hz, length: {len(user_waveform)} samples")
+            user_audio_path = processed_path
+        except Exception as e:
+            logger.error(f"[{request_id}] ❌ Audio processing failed: {str(e)}")
+            return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
+        # Transcribe user audio
         try:
+            logger.info(f"[{request_id}] 🔄 Transcribing user audio")
             # Remove language parameter if causing warnings
             inputs = asr_processor(
+                user_waveform,
                 sampling_rate=sample_rate,
                 return_tensors="pt"
             )
             with torch.no_grad():
                 logits = asr_model(**inputs).logits
             ids = torch.argmax(logits, dim=-1)[0]
+            user_transcription = asr_processor.decode(ids)
+            logger.info(f"[{request_id}] ✅ User transcription: '{user_transcription}'")
+        except Exception as e:
+            logger.error(f"[{request_id}] ❌ ASR inference failed: {str(e)}")
+            return jsonify({"error": f"ASR inference failed: {str(e)}"}), 500
+        # Process reference files in batches
+        batch_size = 2  # Process 2 files at a time - adjust based on your hardware
+        results = []
+        best_score = 0
+        best_reference = None
+        best_transcription = None
+        # Use this if you want to limit the number of files to process
+        max_files_to_check = min(5, len(reference_files))  # Check at most 5 files
+        reference_files = reference_files[:max_files_to_check]
+        logger.info(f"[{request_id}] 🔄 Processing {len(reference_files)} reference files in batches of {batch_size}")
+        # Function to process a single reference file
+        def process_reference_file(ref_file):
+            ref_filename = os.path.basename(ref_file)
+            try:
+                # Load and resample reference audio
+                ref_waveform, ref_sr = torchaudio.load(ref_file)
+                if ref_sr != sample_rate:
+                    ref_waveform = torchaudio.transforms.Resample(ref_sr, sample_rate)(ref_waveform)
+                ref_waveform = ref_waveform.squeeze().numpy()
+                # Transcribe reference audio - use the local asr_model and asr_processor
+                # Remove language parameter if causing warnings
+                inputs = asr_processor(
+                    ref_waveform,
+                    sampling_rate=sample_rate,
+                    return_tensors="pt"
+                )
+                inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    logits = asr_model(**inputs).logits
+                ids = torch.argmax(logits, dim=-1)[0]
+                ref_transcription = asr_processor.decode(ids)
+                # Calculate similarity
+                similarity = calculate_similarity(user_transcription, ref_transcription)
+                logger.info(
+                    f"[{request_id}] 📊 Similarity with {ref_filename}: {similarity:.2f}%, transcription: '{ref_transcription}'")
+                return {
+                    "reference_file": ref_filename,
+                    "reference_text": ref_transcription,
+                    "similarity_score": similarity
+                }
+            except Exception as e:
+                logger.error(f"[{request_id}] ❌ Error processing {ref_filename}: {str(e)}")
+                return {
+                    "reference_file": ref_filename,
+                    "reference_text": "Error",
+                    "similarity_score": 0,
+                    "error": str(e)
+                }
+        # Process files in batches using ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=batch_size) as executor:
+            batch_results = list(executor.map(process_reference_file, reference_files))
+            results.extend(batch_results)
+            # Find the best result
+            for result in batch_results:
+                if result["similarity_score"] > best_score:
+                    best_score = result["similarity_score"]
+                    best_reference = result["reference_file"]
+                    best_transcription = result["reference_text"]
+                    # Exit early if we found a very good match (optional)
+                    if best_score > 80.0:
+                        logger.info(f"[{request_id}] 🏁 Found excellent match: {best_score:.2f}%")
+                        break
+        # Clean up temp files
+        try:
+            if temp_dir and os.path.exists(temp_dir):
+                shutil.rmtree(temp_dir)
+                logger.debug(f"[{request_id}] 🧹 Cleaned up temporary directory")
         except Exception as e:
+            logger.warning(f"[{request_id}] ⚠️ Failed to clean up temp files: {str(e)}")
+        # Determine feedback based on score
+        is_correct = best_score >= 70.0
+        if best_score >= 90.0:
+            feedback = "Perfect pronunciation! Excellent job!"
+        elif best_score >= 80.0:
+            feedback = "Great pronunciation! Your accent is very good."
+        elif best_score >= 70.0:
+            feedback = "Good pronunciation. Keep practicing!"
+        elif best_score >= 50.0:
+            feedback = "Fair attempt. Try focusing on the syllables that differ from the sample."
+        else:
+            feedback = "Try again. Listen carefully to the sample pronunciation."
+        logger.info(f"[{request_id}] 📊 Final evaluation results: score={best_score:.2f}%, is_correct={is_correct}")
+        logger.info(f"[{request_id}] 📝 Feedback: '{feedback}'")
+        logger.info(f"[{request_id}] ✅ Evaluation complete")
+        # Sort results by score descending
+        results.sort(key=lambda x: x["similarity_score"], reverse=True)
+        return jsonify({
+            "is_correct": is_correct,
+            "score": best_score,
+            "feedback": feedback,
+            "user_transcription": user_transcription,
+            "best_reference_transcription": best_transcription,
+            "reference_locator": reference_locator,
+            "details": results
+        })
     except Exception as e:
+        logger.error(f"[{request_id}] ❌ Unhandled exception in evaluation endpoint: {str(e)}")
+        logger.debug(f"[{request_id}] Stack trace: {traceback.format_exc()}")
+        # Clean up on error
+        try:
+            if temp_dir and os.path.exists(temp_dir):
+                shutil.rmtree(temp_dir)
+        except:
+            pass
+        return jsonify({"error": f"Internal server error: {str(e)}"}), 500