Coco-18 commited on
Commit
8eacc95
Β·
verified Β·
1 Parent(s): 98bdad8

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +51 -61
evaluate.py CHANGED
@@ -32,7 +32,7 @@ def calculate_similarity(text1, text2):
32
  return matcher.ratio() * 100
33
 
34
  def setup_reference_patterns(reference_dir, sample_rate=16000):
35
- """Create standard reference pattern directories and dummy files if needed"""
36
  reference_patterns = [
37
  "mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
38
  "komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
@@ -47,7 +47,6 @@ def setup_reference_patterns(reference_dir, sample_rate=16000):
47
  ]
48
 
49
  created_dirs = 0
50
- created_files = 0
51
 
52
  for pattern in reference_patterns:
53
  pattern_dir = os.path.join(reference_dir, pattern)
@@ -59,26 +58,8 @@ def setup_reference_patterns(reference_dir, sample_rate=16000):
59
  except Exception as e:
60
  logger.error(f"❌ Failed to create reference pattern directory {pattern_dir}: {str(e)}")
61
  continue
62
-
63
- # Check if directory has any WAV files, add a dummy if not
64
- wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
65
- if not wav_files:
66
- try:
67
- dummy_path = os.path.join(pattern_dir, "dummy_reference.wav")
68
- # Create a 1-second silent audio file - not completely silent to avoid transcription issues
69
- # Adding a small amount of noise helps ASR models detect something
70
- silent = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
71
- # Add a tiny bit of noise
72
- for i in range(50, 950, 300):
73
- silent = silent.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
74
- silent.export(dummy_path, format="wav")
75
- logger.info(f"πŸ“„ Created dummy reference file: {dummy_path}")
76
- created_files += 1
77
- except Exception as e:
78
- logger.error(f"❌ Failed to create dummy file in {pattern_dir}: {str(e)}")
79
 
80
- return created_dirs, created_files
81
-
82
  def search_reference_directories():
83
  """Search for possible reference directories in various locations"""
84
  possible_locations = [
@@ -152,12 +133,12 @@ def init_reference_audio(reference_dir, output_dir):
152
  os.makedirs(working_dir, exist_ok=True)
153
  logger.info(f"πŸ“ Using reference directory: {working_dir}")
154
 
155
- # Set up reference pattern directories with dummy files if needed
156
- dirs_created, files_created = setup_reference_patterns(working_dir)
157
- logger.info(f"πŸ“Š Created {dirs_created} directories and {files_created} dummy files")
158
 
159
  # Try to copy reference files from other found directories to working directory if needed
160
- if files_created > 0 and len(found_dirs) > 1:
161
  # Try to find a directory with existing WAV files
162
  for directory in found_dirs:
163
  if directory['path'] != working_dir and directory['wav_files'] > 0:
@@ -166,24 +147,32 @@ def init_reference_audio(reference_dir, output_dir):
166
  logger.info(f"πŸ”„ Copying reference files from {source_dir} to {working_dir}")
167
 
168
  # Copy pattern directories that have WAV files
 
169
  for item in os.listdir(source_dir):
170
  src_path = os.path.join(source_dir, item)
171
- if os.path.isdir(src_path) and glob.glob(os.path.join(src_path, "*.wav")):
172
- dst_path = os.path.join(working_dir, item)
 
 
173
 
174
- # Copy each WAV file individually
175
- for wav_file in glob.glob(os.path.join(src_path, "*.wav")):
176
- wav_name = os.path.basename(wav_file)
177
- dst_file = os.path.join(dst_path, wav_name)
178
- if not os.path.exists(dst_file):
179
- shutil.copy2(wav_file, dst_file)
180
- logger.info(f"πŸ“„ Copied {wav_name} to {dst_path}")
 
 
 
 
 
181
 
182
  break
183
  except Exception as e:
184
  logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
185
 
186
- # Log the final contents
187
  pattern_dirs = [d for d in os.listdir(working_dir)
188
  if os.path.isdir(os.path.join(working_dir, d))]
189
  logger.info(f"πŸ“Š Final reference directory has {len(pattern_dirs)} pattern directories")
@@ -192,22 +181,35 @@ def init_reference_audio(reference_dir, output_dir):
192
  for pattern in pattern_dirs:
193
  pattern_path = os.path.join(working_dir, pattern)
194
  wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
195
- total_wav_files += len(wav_files)
196
- logger.info(f" - {pattern}: {len(wav_files)} WAV files")
 
 
197
 
198
  logger.info(f"πŸ“Š Total reference WAV files: {total_wav_files}")
199
 
 
 
 
 
 
 
 
 
 
 
 
200
  return working_dir
201
 
202
  except Exception as e:
203
  logger.error(f"❌ Failed to set up reference audio directory: {str(e)}")
204
  logger.debug(f"Stack trace: {traceback.format_exc()}")
205
 
206
- # As a last resort, try to use /tmp
207
  fallback_dir = os.path.join('/tmp', 'reference_audios')
208
  try:
209
  os.makedirs(fallback_dir, exist_ok=True)
210
- setup_reference_patterns(fallback_dir)
211
  logger.warning(f"⚠️ Using emergency fallback directory: {fallback_dir}")
212
  return fallback_dir
213
  except:
@@ -344,30 +346,18 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
344
 
345
  # Check for reference files
346
  reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
347
- logger.info(f"[{request_id}] πŸ“ Found {len(reference_files)} reference files")
 
 
348
 
349
- # If no reference files exist, create a dummy reference file
350
  if not reference_files:
351
- logger.warning(f"[{request_id}] ⚠️ No reference audio files found in {reference_dir_path}")
352
-
353
- # Create a dummy reference file
354
- try:
355
- dummy_file_path = os.path.join(reference_dir_path, "dummy_reference.wav")
356
- logger.info(f"[{request_id}] πŸ”„ Creating dummy reference file: {dummy_file_path}")
357
-
358
- # Create a 1-second audio file with a slight sound
359
- silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
360
- # Add a tiny bit of noise to help ASR
361
- for i in range(50, 950, 300):
362
- silent_audio = silent_audio.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
363
- silent_audio.export(dummy_file_path, format="wav")
364
-
365
- # Add it to the list of reference files
366
- reference_files = [dummy_file_path]
367
- logger.info(f"[{request_id}] βœ… Created dummy reference file for testing")
368
- except Exception as e:
369
- logger.error(f"[{request_id}] ❌ Failed to create dummy reference: {str(e)}")
370
- return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
371
 
372
  lang_code = LANGUAGE_CODES.get(language, language)
373
  logger.info(f"[{request_id}] πŸ”„ Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
 
32
  return matcher.ratio() * 100
33
 
34
  def setup_reference_patterns(reference_dir, sample_rate=16000):
35
+ """Create standard reference pattern directories without dummy files"""
36
  reference_patterns = [
37
  "mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
38
  "komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
 
47
  ]
48
 
49
  created_dirs = 0
 
50
 
51
  for pattern in reference_patterns:
52
  pattern_dir = os.path.join(reference_dir, pattern)
 
58
  except Exception as e:
59
  logger.error(f"❌ Failed to create reference pattern directory {pattern_dir}: {str(e)}")
60
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ return created_dirs, 0 # Return 0 created files since we're not creating dummy files anymore
 
63
  def search_reference_directories():
64
  """Search for possible reference directories in various locations"""
65
  possible_locations = [
 
133
  os.makedirs(working_dir, exist_ok=True)
134
  logger.info(f"πŸ“ Using reference directory: {working_dir}")
135
 
136
+ # Set up reference pattern directories WITHOUT dummy files
137
+ dirs_created, _ = setup_reference_patterns(working_dir)
138
+ logger.info(f"πŸ“Š Created {dirs_created} directories")
139
 
140
  # Try to copy reference files from other found directories to working directory if needed
141
+ if len(found_dirs) > 1:
142
  # Try to find a directory with existing WAV files
143
  for directory in found_dirs:
144
  if directory['path'] != working_dir and directory['wav_files'] > 0:
 
147
  logger.info(f"πŸ”„ Copying reference files from {source_dir} to {working_dir}")
148
 
149
  # Copy pattern directories that have WAV files
150
+ # But skip any dummy reference files
151
  for item in os.listdir(source_dir):
152
  src_path = os.path.join(source_dir, item)
153
+ if os.path.isdir(src_path):
154
+ wav_files = glob.glob(os.path.join(src_path, "*.wav"))
155
+ # Filter out dummy references
156
+ wav_files = [f for f in wav_files if "dummy_reference" not in f]
157
 
158
+ if wav_files: # Only proceed if there are valid files
159
+ dst_path = os.path.join(working_dir, item)
160
+ os.makedirs(dst_path, exist_ok=True)
161
+
162
+ # Copy each valid WAV file individually
163
+ for wav_file in wav_files:
164
+ wav_name = os.path.basename(wav_file)
165
+ if "dummy_reference" not in wav_name: # Extra check
166
+ dst_file = os.path.join(dst_path, wav_name)
167
+ if not os.path.exists(dst_file):
168
+ shutil.copy2(wav_file, dst_file)
169
+ logger.info(f"πŸ“„ Copied {wav_name} to {dst_path}")
170
 
171
  break
172
  except Exception as e:
173
  logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
174
 
175
+ # Log the final contents, excluding dummy files
176
  pattern_dirs = [d for d in os.listdir(working_dir)
177
  if os.path.isdir(os.path.join(working_dir, d))]
178
  logger.info(f"πŸ“Š Final reference directory has {len(pattern_dirs)} pattern directories")
 
181
  for pattern in pattern_dirs:
182
  pattern_path = os.path.join(working_dir, pattern)
183
  wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
184
+ # Count only non-dummy files
185
+ valid_files = [f for f in wav_files if "dummy_reference" not in f]
186
+ total_wav_files += len(valid_files)
187
+ logger.info(f" - {pattern}: {len(valid_files)} valid WAV files")
188
 
189
  logger.info(f"πŸ“Š Total reference WAV files: {total_wav_files}")
190
 
191
+ # Check for and remove any dummy files
192
+ for pattern in pattern_dirs:
193
+ pattern_path = os.path.join(working_dir, pattern)
194
+ dummy_files = glob.glob(os.path.join(pattern_path, "dummy_reference.wav"))
195
+ for dummy in dummy_files:
196
+ try:
197
+ os.remove(dummy)
198
+ logger.info(f"πŸ—‘οΈ Removed dummy file: {dummy}")
199
+ except Exception as e:
200
+ logger.warning(f"⚠️ Failed to remove dummy file {dummy}: {str(e)}")
201
+
202
  return working_dir
203
 
204
  except Exception as e:
205
  logger.error(f"❌ Failed to set up reference audio directory: {str(e)}")
206
  logger.debug(f"Stack trace: {traceback.format_exc()}")
207
 
208
+ # As a last resort, try to use /tmp but without dummy files
209
  fallback_dir = os.path.join('/tmp', 'reference_audios')
210
  try:
211
  os.makedirs(fallback_dir, exist_ok=True)
212
+ setup_reference_patterns(fallback_dir) # This now doesn't create dummy files
213
  logger.warning(f"⚠️ Using emergency fallback directory: {fallback_dir}")
214
  return fallback_dir
215
  except:
 
346
 
347
  # Check for reference files
348
  reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
349
+ # Filter out any dummy reference files
350
+ reference_files = [f for f in reference_files if "dummy_reference" not in f]
351
+ logger.info(f"[{request_id}] πŸ“ Found {len(reference_files)} valid reference files")
352
 
353
+ # If no reference files exist, return a more detailed error message
354
  if not reference_files:
355
+ logger.warning(f"[{request_id}] ⚠️ No valid reference audio files found in {reference_dir_path}")
356
+ return jsonify({
357
+ "error": f"No reference audio found for {reference_locator}",
358
+ "message": "Please upload a reference audio file before evaluation.",
359
+ "status": "MISSING_REFERENCE"
360
+ }), 404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  lang_code = LANGUAGE_CODES.get(language, language)
363
  logger.info(f"[{request_id}] πŸ”„ Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")