Update evaluate.py
Browse files- evaluate.py +126 -1
evaluate.py
CHANGED
@@ -814,7 +814,132 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
814 |
resume_preprocessing()
|
815 |
return response
|
816 |
|
817 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
818 |
def get_preprocessing_status():
|
819 |
"""Get the current status of reference audio preprocessing"""
|
820 |
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE
|
|
|
814 |
resume_preprocessing()
|
815 |
return response
|
816 |
|
817 |
+
def handle_upload_reference(request, reference_dir, sample_rate):
|
818 |
+
"""Handle upload of reference audio files and preprocess immediately"""
|
819 |
+
global REFERENCE_CACHE
|
820 |
+
|
821 |
+
# Pause preprocessing while handling user request
|
822 |
+
pause_preprocessing()
|
823 |
+
|
824 |
+
try:
|
825 |
+
if "audio" not in request.files:
|
826 |
+
logger.warning("⚠️ Reference upload missing audio file")
|
827 |
+
# Resume preprocessing before returning
|
828 |
+
resume_preprocessing()
|
829 |
+
return jsonify({"error": "No audio file uploaded"}), 400
|
830 |
+
|
831 |
+
reference_word = request.form.get("reference_word", "").strip()
|
832 |
+
if not reference_word:
|
833 |
+
logger.warning("⚠️ Reference upload missing reference word")
|
834 |
+
# Resume preprocessing before returning
|
835 |
+
resume_preprocessing()
|
836 |
+
return jsonify({"error": "No reference word provided"}), 400
|
837 |
+
|
838 |
+
# Validate reference word
|
839 |
+
reference_patterns = [
|
840 |
+
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
|
841 |
+
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
|
842 |
+
"pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku",
|
843 |
+
"wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na",
|
844 |
+
"nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu",
|
845 |
+
"mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka",
|
846 |
+
"munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka",
|
847 |
+
"nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini",
|
848 |
+
"makananu_munta_king", "adwa", "anam", "apat", "apulu", "atlu", "dinalan", "libu", "lima",
|
849 |
+
"metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi",
|
850 |
+
"arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud",
|
851 |
+
"pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali",
|
852 |
+
"pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan"
|
853 |
+
]
|
854 |
+
|
855 |
+
if reference_word not in reference_patterns:
|
856 |
+
logger.warning(f"⚠️ Invalid reference word: {reference_word}")
|
857 |
+
# Resume preprocessing before returning
|
858 |
+
resume_preprocessing()
|
859 |
+
return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400
|
860 |
+
|
861 |
+
# Make sure we have a writable reference directory
|
862 |
+
if not os.path.exists(reference_dir):
|
863 |
+
reference_dir = os.path.join('/tmp', 'reference_audios')
|
864 |
+
os.makedirs(reference_dir, exist_ok=True)
|
865 |
+
logger.warning(f"⚠️ Using alternate reference directory for upload: {reference_dir}")
|
866 |
+
|
867 |
+
# Create directory for reference pattern if it doesn't exist
|
868 |
+
pattern_dir = os.path.join(reference_dir, reference_word)
|
869 |
+
os.makedirs(pattern_dir, exist_ok=True)
|
870 |
+
|
871 |
+
# Save the reference audio file
|
872 |
+
audio_file = request.files["audio"]
|
873 |
+
filename = secure_filename(audio_file.filename)
|
874 |
+
|
875 |
+
# Ensure filename has .wav extension
|
876 |
+
if not filename.lower().endswith('.wav'):
|
877 |
+
base_name = os.path.splitext(filename)[0]
|
878 |
+
filename = f"{base_name}.wav"
|
879 |
+
|
880 |
+
file_path = os.path.join(pattern_dir, filename)
|
881 |
+
|
882 |
+
# Create a temporary file first, then convert to WAV
|
883 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
884 |
+
audio_file.save(temp_file.name)
|
885 |
+
temp_path = temp_file.name
|
886 |
+
|
887 |
+
try:
|
888 |
+
# Process the audio file
|
889 |
+
audio = AudioSegment.from_file(temp_path)
|
890 |
+
audio = audio.set_frame_rate(sample_rate).set_channels(1)
|
891 |
+
audio.export(file_path, format="wav")
|
892 |
+
logger.info(f"✅ Reference audio saved successfully for {reference_word}: {file_path}")
|
893 |
+
|
894 |
+
# Clean up temp file
|
895 |
+
try:
|
896 |
+
os.unlink(temp_path)
|
897 |
+
except:
|
898 |
+
pass
|
899 |
+
|
900 |
+
# Immediately preprocess this new reference file and add to cache
|
901 |
+
asr_model = get_asr_model()
|
902 |
+
asr_processor = get_asr_processor()
|
903 |
+
|
904 |
+
if asr_model and asr_processor:
|
905 |
+
# Initialize cache for this pattern if needed
|
906 |
+
if reference_word not in REFERENCE_CACHE:
|
907 |
+
REFERENCE_CACHE[reference_word] = {}
|
908 |
+
|
909 |
+
# Preprocess and add to cache
|
910 |
+
result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor)
|
911 |
+
if result:
|
912 |
+
REFERENCE_CACHE[reference_word][filename] = result
|
913 |
+
logger.info(f"✅ New reference audio preprocessed and added to cache: {filename}")
|
914 |
+
|
915 |
+
except Exception as e:
|
916 |
+
logger.error(f"❌ Reference audio processing failed: {str(e)}")
|
917 |
+
# Resume preprocessing before returning
|
918 |
+
resume_preprocessing()
|
919 |
+
return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
|
920 |
+
|
921 |
+
# Count how many references we have now
|
922 |
+
references = glob.glob(os.path.join(pattern_dir, "*.wav"))
|
923 |
+
|
924 |
+
# Resume preprocessing before returning
|
925 |
+
resume_preprocessing()
|
926 |
+
return jsonify({
|
927 |
+
"message": "Reference audio uploaded successfully",
|
928 |
+
"reference_word": reference_word,
|
929 |
+
"file": filename,
|
930 |
+
"total_references": len(references),
|
931 |
+
"preprocessed": True
|
932 |
+
})
|
933 |
+
|
934 |
+
except Exception as e:
|
935 |
+
logger.error(f"❌ Unhandled exception in reference upload: {str(e)}")
|
936 |
+
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
937 |
+
|
938 |
+
# Make sure to resume preprocessing even if there's an error
|
939 |
+
resume_preprocessing()
|
940 |
+
return jsonify({"error": f"Internal server error: {str(e)}"}), 500
|
941 |
+
|
942 |
+
# Add a new function to get preprocessing status
|
943 |
def get_preprocessing_status():
|
944 |
"""Get the current status of reference audio preprocessing"""
|
945 |
global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE
|