Coco-18 commited on
Commit
1d7248d
·
verified ·
1 Parent(s): ff4f467

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +126 -1
evaluate.py CHANGED
@@ -814,7 +814,132 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
814
  resume_preprocessing()
815
  return response
816
 
817
- # Add a new function to get preprocessing status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  def get_preprocessing_status():
819
  """Get the current status of reference audio preprocessing"""
820
  global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE
 
814
  resume_preprocessing()
815
  return response
816
 
817
+ def handle_upload_reference(request, reference_dir, sample_rate):
818
+ """Handle upload of reference audio files and preprocess immediately"""
819
+ global REFERENCE_CACHE
820
+
821
+ # Pause preprocessing while handling user request
822
+ pause_preprocessing()
823
+
824
+ try:
825
+ if "audio" not in request.files:
826
+ logger.warning("⚠️ Reference upload missing audio file")
827
+ # Resume preprocessing before returning
828
+ resume_preprocessing()
829
+ return jsonify({"error": "No audio file uploaded"}), 400
830
+
831
+ reference_word = request.form.get("reference_word", "").strip()
832
+ if not reference_word:
833
+ logger.warning("⚠️ Reference upload missing reference word")
834
+ # Resume preprocessing before returning
835
+ resume_preprocessing()
836
+ return jsonify({"error": "No reference word provided"}), 400
837
+
838
+ # Validate reference word
839
+ reference_patterns = [
840
+ "mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
841
+ "komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
842
+ "pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku",
843
+ "wa", "ali", "tuknang", "lagwa", "galo", "buri_ke_ini", "tara_na",
844
+ "nokarin_ka_ibat", "nokarin_ka_munta", "atiu_na_ku", "nanung_panayan_mu",
845
+ "mako_na_ka", "muli_ta_na", "nanu_ing_pengan_mu", "mekeni", "mengan_na_ka",
846
+ "munta_ka_karin", "magkanu_ini", "mimingat_ka", "mangan_ta_na", "lakwan_da_ka",
847
+ "nanu_maliari_kung_daptan_keka", "pilan_na_ka_banwa", "saliwan_ke_ini",
848
+ "makananu_munta_king", "adwa", "anam", "apat", "apulu", "atlu", "dinalan", "libu", "lima",
849
+ "metung", "pitu", "siyam", "walu", "masala", "madalumdum", "maragul", "marimla", "malagu", "marok", "mababa", "malapit", "matuling", "maputi",
850
+ "arung", "asbuk", "balugbug", "bitis", "buntuk", "butit", "gamat", "kuku", "salu", "tud",
851
+ "pisan", "dara", "achi", "apu", "ima", "tatang", "pengari", "koya", "kapatad", "wali",
852
+ "pasbul", "awang", "dagis", "bale", "ulas", "sambra", "sulu", "pitudturan", "luklukan", "ulnan"
853
+ ]
854
+
855
+ if reference_word not in reference_patterns:
856
+ logger.warning(f"⚠️ Invalid reference word: {reference_word}")
857
+ # Resume preprocessing before returning
858
+ resume_preprocessing()
859
+ return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400
860
+
861
+ # Make sure we have a writable reference directory
862
+ if not os.path.exists(reference_dir):
863
+ reference_dir = os.path.join('/tmp', 'reference_audios')
864
+ os.makedirs(reference_dir, exist_ok=True)
865
+ logger.warning(f"⚠️ Using alternate reference directory for upload: {reference_dir}")
866
+
867
+ # Create directory for reference pattern if it doesn't exist
868
+ pattern_dir = os.path.join(reference_dir, reference_word)
869
+ os.makedirs(pattern_dir, exist_ok=True)
870
+
871
+ # Save the reference audio file
872
+ audio_file = request.files["audio"]
873
+ filename = secure_filename(audio_file.filename)
874
+
875
+ # Ensure filename has .wav extension
876
+ if not filename.lower().endswith('.wav'):
877
+ base_name = os.path.splitext(filename)[0]
878
+ filename = f"{base_name}.wav"
879
+
880
+ file_path = os.path.join(pattern_dir, filename)
881
+
882
+ # Create a temporary file first, then convert to WAV
883
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
884
+ audio_file.save(temp_file.name)
885
+ temp_path = temp_file.name
886
+
887
+ try:
888
+ # Process the audio file
889
+ audio = AudioSegment.from_file(temp_path)
890
+ audio = audio.set_frame_rate(sample_rate).set_channels(1)
891
+ audio.export(file_path, format="wav")
892
+ logger.info(f"✅ Reference audio saved successfully for {reference_word}: {file_path}")
893
+
894
+ # Clean up temp file
895
+ try:
896
+ os.unlink(temp_path)
897
+ except:
898
+ pass
899
+
900
+ # Immediately preprocess this new reference file and add to cache
901
+ asr_model = get_asr_model()
902
+ asr_processor = get_asr_processor()
903
+
904
+ if asr_model and asr_processor:
905
+ # Initialize cache for this pattern if needed
906
+ if reference_word not in REFERENCE_CACHE:
907
+ REFERENCE_CACHE[reference_word] = {}
908
+
909
+ # Preprocess and add to cache
910
+ result = preprocess_reference_file(file_path, sample_rate, asr_model, asr_processor)
911
+ if result:
912
+ REFERENCE_CACHE[reference_word][filename] = result
913
+ logger.info(f"✅ New reference audio preprocessed and added to cache: {filename}")
914
+
915
+ except Exception as e:
916
+ logger.error(f"❌ Reference audio processing failed: {str(e)}")
917
+ # Resume preprocessing before returning
918
+ resume_preprocessing()
919
+ return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
920
+
921
+ # Count how many references we have now
922
+ references = glob.glob(os.path.join(pattern_dir, "*.wav"))
923
+
924
+ # Resume preprocessing before returning
925
+ resume_preprocessing()
926
+ return jsonify({
927
+ "message": "Reference audio uploaded successfully",
928
+ "reference_word": reference_word,
929
+ "file": filename,
930
+ "total_references": len(references),
931
+ "preprocessed": True
932
+ })
933
+
934
+ except Exception as e:
935
+ logger.error(f"❌ Unhandled exception in reference upload: {str(e)}")
936
+ logger.debug(f"Stack trace: {traceback.format_exc()}")
937
+
938
+ # Make sure to resume preprocessing even if there's an error
939
+ resume_preprocessing()
940
+ return jsonify({"error": f"Internal server error: {str(e)}"}), 500
941
+
942
+ # Add a new function to get preprocessing status
943
  def get_preprocessing_status():
944
  """Get the current status of reference audio preprocessing"""
945
  global PREPROCESSING_COMPLETE, REFERENCE_CACHE, PREPROCESSING_ACTIVE, PREPROCESSING_PAUSE