Update evaluate.py
Browse files- evaluate.py +51 -61
evaluate.py
CHANGED
@@ -32,7 +32,7 @@ def calculate_similarity(text1, text2):
|
|
32 |
return matcher.ratio() * 100
|
33 |
|
34 |
def setup_reference_patterns(reference_dir, sample_rate=16000):
|
35 |
-
"""Create standard reference pattern directories
|
36 |
reference_patterns = [
|
37 |
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
|
38 |
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
|
@@ -47,7 +47,6 @@ def setup_reference_patterns(reference_dir, sample_rate=16000):
|
|
47 |
]
|
48 |
|
49 |
created_dirs = 0
|
50 |
-
created_files = 0
|
51 |
|
52 |
for pattern in reference_patterns:
|
53 |
pattern_dir = os.path.join(reference_dir, pattern)
|
@@ -59,26 +58,8 @@ def setup_reference_patterns(reference_dir, sample_rate=16000):
|
|
59 |
except Exception as e:
|
60 |
logger.error(f"β Failed to create reference pattern directory {pattern_dir}: {str(e)}")
|
61 |
continue
|
62 |
-
|
63 |
-
# Check if directory has any WAV files, add a dummy if not
|
64 |
-
wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
|
65 |
-
if not wav_files:
|
66 |
-
try:
|
67 |
-
dummy_path = os.path.join(pattern_dir, "dummy_reference.wav")
|
68 |
-
# Create a 1-second silent audio file - not completely silent to avoid transcription issues
|
69 |
-
# Adding a small amount of noise helps ASR models detect something
|
70 |
-
silent = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
|
71 |
-
# Add a tiny bit of noise
|
72 |
-
for i in range(50, 950, 300):
|
73 |
-
silent = silent.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
|
74 |
-
silent.export(dummy_path, format="wav")
|
75 |
-
logger.info(f"π Created dummy reference file: {dummy_path}")
|
76 |
-
created_files += 1
|
77 |
-
except Exception as e:
|
78 |
-
logger.error(f"β Failed to create dummy file in {pattern_dir}: {str(e)}")
|
79 |
|
80 |
-
return created_dirs,
|
81 |
-
|
82 |
def search_reference_directories():
|
83 |
"""Search for possible reference directories in various locations"""
|
84 |
possible_locations = [
|
@@ -152,12 +133,12 @@ def init_reference_audio(reference_dir, output_dir):
|
|
152 |
os.makedirs(working_dir, exist_ok=True)
|
153 |
logger.info(f"π Using reference directory: {working_dir}")
|
154 |
|
155 |
-
# Set up reference pattern directories
|
156 |
-
dirs_created,
|
157 |
-
logger.info(f"π Created {dirs_created} directories
|
158 |
|
159 |
# Try to copy reference files from other found directories to working directory if needed
|
160 |
-
if
|
161 |
# Try to find a directory with existing WAV files
|
162 |
for directory in found_dirs:
|
163 |
if directory['path'] != working_dir and directory['wav_files'] > 0:
|
@@ -166,24 +147,32 @@ def init_reference_audio(reference_dir, output_dir):
|
|
166 |
logger.info(f"π Copying reference files from {source_dir} to {working_dir}")
|
167 |
|
168 |
# Copy pattern directories that have WAV files
|
|
|
169 |
for item in os.listdir(source_dir):
|
170 |
src_path = os.path.join(source_dir, item)
|
171 |
-
if os.path.isdir(src_path)
|
172 |
-
|
|
|
|
|
173 |
|
174 |
-
#
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
break
|
183 |
except Exception as e:
|
184 |
logger.warning(f"β οΈ Failed to copy reference files: {str(e)}")
|
185 |
|
186 |
-
# Log the final contents
|
187 |
pattern_dirs = [d for d in os.listdir(working_dir)
|
188 |
if os.path.isdir(os.path.join(working_dir, d))]
|
189 |
logger.info(f"π Final reference directory has {len(pattern_dirs)} pattern directories")
|
@@ -192,22 +181,35 @@ def init_reference_audio(reference_dir, output_dir):
|
|
192 |
for pattern in pattern_dirs:
|
193 |
pattern_path = os.path.join(working_dir, pattern)
|
194 |
wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
|
195 |
-
|
196 |
-
|
|
|
|
|
197 |
|
198 |
logger.info(f"π Total reference WAV files: {total_wav_files}")
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
return working_dir
|
201 |
|
202 |
except Exception as e:
|
203 |
logger.error(f"β Failed to set up reference audio directory: {str(e)}")
|
204 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
205 |
|
206 |
-
# As a last resort, try to use /tmp
|
207 |
fallback_dir = os.path.join('/tmp', 'reference_audios')
|
208 |
try:
|
209 |
os.makedirs(fallback_dir, exist_ok=True)
|
210 |
-
setup_reference_patterns(fallback_dir)
|
211 |
logger.warning(f"β οΈ Using emergency fallback directory: {fallback_dir}")
|
212 |
return fallback_dir
|
213 |
except:
|
@@ -344,30 +346,18 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
|
|
344 |
|
345 |
# Check for reference files
|
346 |
reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
|
347 |
-
|
|
|
|
|
348 |
|
349 |
-
# If no reference files exist,
|
350 |
if not reference_files:
|
351 |
-
logger.warning(f"[{request_id}] β οΈ No reference audio files found in {reference_dir_path}")
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
# Create a 1-second audio file with a slight sound
|
359 |
-
silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
|
360 |
-
# Add a tiny bit of noise to help ASR
|
361 |
-
for i in range(50, 950, 300):
|
362 |
-
silent_audio = silent_audio.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
|
363 |
-
silent_audio.export(dummy_file_path, format="wav")
|
364 |
-
|
365 |
-
# Add it to the list of reference files
|
366 |
-
reference_files = [dummy_file_path]
|
367 |
-
logger.info(f"[{request_id}] β
Created dummy reference file for testing")
|
368 |
-
except Exception as e:
|
369 |
-
logger.error(f"[{request_id}] β Failed to create dummy reference: {str(e)}")
|
370 |
-
return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
|
371 |
|
372 |
lang_code = LANGUAGE_CODES.get(language, language)
|
373 |
logger.info(f"[{request_id}] π Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
|
|
|
32 |
return matcher.ratio() * 100
|
33 |
|
34 |
def setup_reference_patterns(reference_dir, sample_rate=16000):
|
35 |
+
"""Create standard reference pattern directories without dummy files"""
|
36 |
reference_patterns = [
|
37 |
"mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
|
38 |
"komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
|
|
|
47 |
]
|
48 |
|
49 |
created_dirs = 0
|
|
|
50 |
|
51 |
for pattern in reference_patterns:
|
52 |
pattern_dir = os.path.join(reference_dir, pattern)
|
|
|
58 |
except Exception as e:
|
59 |
logger.error(f"β Failed to create reference pattern directory {pattern_dir}: {str(e)}")
|
60 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
return created_dirs, 0 # Return 0 created files since we're not creating dummy files anymore
|
|
|
63 |
def search_reference_directories():
|
64 |
"""Search for possible reference directories in various locations"""
|
65 |
possible_locations = [
|
|
|
133 |
os.makedirs(working_dir, exist_ok=True)
|
134 |
logger.info(f"π Using reference directory: {working_dir}")
|
135 |
|
136 |
+
# Set up reference pattern directories WITHOUT dummy files
|
137 |
+
dirs_created, _ = setup_reference_patterns(working_dir)
|
138 |
+
logger.info(f"π Created {dirs_created} directories")
|
139 |
|
140 |
# Try to copy reference files from other found directories to working directory if needed
|
141 |
+
if len(found_dirs) > 1:
|
142 |
# Try to find a directory with existing WAV files
|
143 |
for directory in found_dirs:
|
144 |
if directory['path'] != working_dir and directory['wav_files'] > 0:
|
|
|
147 |
logger.info(f"π Copying reference files from {source_dir} to {working_dir}")
|
148 |
|
149 |
# Copy pattern directories that have WAV files
|
150 |
+
# But skip any dummy reference files
|
151 |
for item in os.listdir(source_dir):
|
152 |
src_path = os.path.join(source_dir, item)
|
153 |
+
if os.path.isdir(src_path):
|
154 |
+
wav_files = glob.glob(os.path.join(src_path, "*.wav"))
|
155 |
+
# Filter out dummy references
|
156 |
+
wav_files = [f for f in wav_files if "dummy_reference" not in f]
|
157 |
|
158 |
+
if wav_files: # Only proceed if there are valid files
|
159 |
+
dst_path = os.path.join(working_dir, item)
|
160 |
+
os.makedirs(dst_path, exist_ok=True)
|
161 |
+
|
162 |
+
# Copy each valid WAV file individually
|
163 |
+
for wav_file in wav_files:
|
164 |
+
wav_name = os.path.basename(wav_file)
|
165 |
+
if "dummy_reference" not in wav_name: # Extra check
|
166 |
+
dst_file = os.path.join(dst_path, wav_name)
|
167 |
+
if not os.path.exists(dst_file):
|
168 |
+
shutil.copy2(wav_file, dst_file)
|
169 |
+
logger.info(f"π Copied {wav_name} to {dst_path}")
|
170 |
|
171 |
break
|
172 |
except Exception as e:
|
173 |
logger.warning(f"β οΈ Failed to copy reference files: {str(e)}")
|
174 |
|
175 |
+
# Log the final contents, excluding dummy files
|
176 |
pattern_dirs = [d for d in os.listdir(working_dir)
|
177 |
if os.path.isdir(os.path.join(working_dir, d))]
|
178 |
logger.info(f"π Final reference directory has {len(pattern_dirs)} pattern directories")
|
|
|
181 |
for pattern in pattern_dirs:
|
182 |
pattern_path = os.path.join(working_dir, pattern)
|
183 |
wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
|
184 |
+
# Count only non-dummy files
|
185 |
+
valid_files = [f for f in wav_files if "dummy_reference" not in f]
|
186 |
+
total_wav_files += len(valid_files)
|
187 |
+
logger.info(f" - {pattern}: {len(valid_files)} valid WAV files")
|
188 |
|
189 |
logger.info(f"π Total reference WAV files: {total_wav_files}")
|
190 |
|
191 |
+
# Check for and remove any dummy files
|
192 |
+
for pattern in pattern_dirs:
|
193 |
+
pattern_path = os.path.join(working_dir, pattern)
|
194 |
+
dummy_files = glob.glob(os.path.join(pattern_path, "dummy_reference.wav"))
|
195 |
+
for dummy in dummy_files:
|
196 |
+
try:
|
197 |
+
os.remove(dummy)
|
198 |
+
logger.info(f"ποΈ Removed dummy file: {dummy}")
|
199 |
+
except Exception as e:
|
200 |
+
logger.warning(f"β οΈ Failed to remove dummy file {dummy}: {str(e)}")
|
201 |
+
|
202 |
return working_dir
|
203 |
|
204 |
except Exception as e:
|
205 |
logger.error(f"β Failed to set up reference audio directory: {str(e)}")
|
206 |
logger.debug(f"Stack trace: {traceback.format_exc()}")
|
207 |
|
208 |
+
# As a last resort, try to use /tmp but without dummy files
|
209 |
fallback_dir = os.path.join('/tmp', 'reference_audios')
|
210 |
try:
|
211 |
os.makedirs(fallback_dir, exist_ok=True)
|
212 |
+
setup_reference_patterns(fallback_dir) # This now doesn't create dummy files
|
213 |
logger.warning(f"β οΈ Using emergency fallback directory: {fallback_dir}")
|
214 |
return fallback_dir
|
215 |
except:
|
|
|
346 |
|
347 |
# Check for reference files
|
348 |
reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
|
349 |
+
# Filter out any dummy reference files
|
350 |
+
reference_files = [f for f in reference_files if "dummy_reference" not in f]
|
351 |
+
logger.info(f"[{request_id}] π Found {len(reference_files)} valid reference files")
|
352 |
|
353 |
+
# If no reference files exist, return a more detailed error message
|
354 |
if not reference_files:
|
355 |
+
logger.warning(f"[{request_id}] β οΈ No valid reference audio files found in {reference_dir_path}")
|
356 |
+
return jsonify({
|
357 |
+
"error": f"No reference audio found for {reference_locator}",
|
358 |
+
"message": "Please upload a reference audio file before evaluation.",
|
359 |
+
"status": "MISSING_REFERENCE"
|
360 |
+
}), 404
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
lang_code = LANGUAGE_CODES.get(language, language)
|
363 |
logger.info(f"[{request_id}] π Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
|