Coco-18 commited on
Commit
2dbb181
Β·
verified Β·
1 Parent(s): 70bf861

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +220 -95
evaluate.py CHANGED
@@ -31,62 +31,181 @@ def calculate_similarity(text1, text2):
31
  matcher = SequenceMatcher(None, clean1, clean2)
32
  return matcher.ratio() * 100
33
 
34
- # In evaluate.py, modify the init_reference_audio function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def init_reference_audio(reference_dir, output_dir):
 
37
  try:
38
  # Create the output directory first
39
  os.makedirs(output_dir, exist_ok=True)
40
  logger.info(f"πŸ“ Created output directory: {output_dir}")
41
 
42
- # Check if the reference audio directory exists
43
- if not os.path.exists(reference_dir) or not os.access(os.path.dirname(reference_dir), os.W_OK):
44
- # Use a directory in /tmp instead
45
- reference_dir = os.path.join('/tmp', 'reference_audios')
46
- logger.warning(f"⚠️ Using alternate reference directory: {reference_dir}")
47
-
48
- # Make sure the reference directory exists
49
- os.makedirs(reference_dir, exist_ok=True)
50
- logger.info(f"πŸ“ Created/verified reference audio directory: {reference_dir}")
51
 
52
- # Create all the pattern subdirectories
53
- setup_reference_patterns(reference_dir)
 
54
 
55
- # Try to copy any existing reference files if they exist in the original location
56
- original_ref_dir = "./reference_audios"
57
- if os.path.exists(original_ref_dir) and reference_dir != original_ref_dir:
58
- try:
59
- import shutil
60
- # Get all pattern directories
61
- for item in os.listdir(original_ref_dir):
62
- src_path = os.path.join(original_ref_dir, item)
63
- dst_path = os.path.join(reference_dir, item)
64
-
65
- if os.path.isdir(src_path):
66
- # Copy directory and contents
67
- if not os.path.exists(dst_path):
68
- shutil.copytree(src_path, dst_path)
69
- logger.info(f"πŸ“ Copied reference pattern from {src_path} to {dst_path}")
70
- except Exception as e:
71
- logger.warning(f"⚠️ Could not copy original reference files: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- # Log the contents to verify
74
- if os.path.exists(reference_dir):
75
- pattern_dirs = [d for d in os.listdir(reference_dir)
76
- if os.path.isdir(os.path.join(reference_dir, d))]
77
- logger.info(f"πŸ“ Found reference patterns: {pattern_dirs}")
78
-
79
- # Check each pattern directory for wav files
80
- for pattern_dir_name in pattern_dirs:
81
- pattern_path = os.path.join(reference_dir, pattern_dir_name)
82
- wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
83
- logger.info(f"πŸ“ Found {len(wav_files)} wav files in {pattern_dir_name}")
84
-
85
- return reference_dir
86
 
87
  except Exception as e:
88
  logger.error(f"❌ Failed to set up reference audio directory: {str(e)}")
89
- return reference_dir
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  def handle_upload_reference(request, reference_dir, sample_rate):
92
  """Handle upload of reference audio files"""
@@ -111,38 +230,54 @@ def handle_upload_reference(request, reference_dir, sample_rate):
111
  logger.warning(f"⚠️ Invalid reference word: {reference_word}")
112
  return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400
113
 
 
 
 
 
 
 
114
  # Create directory for reference pattern if it doesn't exist
115
  pattern_dir = os.path.join(reference_dir, reference_word)
116
  os.makedirs(pattern_dir, exist_ok=True)
117
 
118
  # Save the reference audio file
119
  audio_file = request.files["audio"]
120
- file_path = os.path.join(pattern_dir, secure_filename(audio_file.filename))
121
- audio_file.save(file_path)
122
-
123
- # Convert to WAV if not already in that format
124
- if not file_path.lower().endswith('.wav'):
125
- base_path = os.path.splitext(file_path)[0]
126
- wav_path = f"{base_path}.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  try:
128
- audio = AudioSegment.from_file(file_path)
129
- audio = audio.set_frame_rate(sample_rate).set_channels(1)
130
- audio.export(wav_path, format="wav")
131
- # Remove original file if conversion successful
132
- os.unlink(file_path)
133
- file_path = wav_path
134
- except Exception as e:
135
- logger.error(f"❌ Reference audio conversion failed: {str(e)}")
136
- return jsonify({"error": f"Audio conversion failed: {str(e)}"}), 500
137
-
138
- logger.info(f"βœ… Reference audio saved successfully for {reference_word}: {file_path}")
139
 
140
  # Count how many references we have now
141
  references = glob.glob(os.path.join(pattern_dir, "*.wav"))
142
  return jsonify({
143
  "message": "Reference audio uploaded successfully",
144
  "reference_word": reference_word,
145
- "file": os.path.basename(file_path),
146
  "total_references": len(references)
147
  })
148
 
@@ -151,8 +286,8 @@ def handle_upload_reference(request, reference_dir, sample_rate):
151
  logger.debug(f"Stack trace: {traceback.format_exc()}")
152
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
153
 
154
-
155
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
 
156
  request_id = f"req-{id(request)}" # Create unique ID for this request
157
  logger.info(f"[{request_id}] πŸ†• Starting new pronunciation evaluation request")
158
 
@@ -184,24 +319,33 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
184
  reference_dir_path = os.path.join(reference_dir, reference_locator)
185
  logger.info(f"[{request_id}] πŸ“ Reference directory path: {reference_dir_path}")
186
 
187
- if not os.path.exists(reference_dir_path):
188
- logger.warning(f"[{request_id}] ⚠️ Reference directory not found: {reference_dir_path}")
189
- return jsonify({"error": f"Reference audio directory not found: {reference_locator}"}), 404
190
-
 
 
 
 
 
 
191
  reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
192
  logger.info(f"[{request_id}] πŸ“ Found {len(reference_files)} reference files")
193
 
194
- # Inside handle_evaluation_request, after checking for reference files:
195
  if not reference_files:
196
  logger.warning(f"[{request_id}] ⚠️ No reference audio files found in {reference_dir_path}")
197
 
198
- # Create a dummy reference file for testing
199
  try:
200
  dummy_file_path = os.path.join(reference_dir_path, "dummy_reference.wav")
201
  logger.info(f"[{request_id}] πŸ”„ Creating dummy reference file: {dummy_file_path}")
202
 
203
- # Create a 1-second silent WAV file
204
  silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
 
 
 
205
  silent_audio.export(dummy_file_path, format="wav")
206
 
207
  # Add it to the list of reference files
@@ -212,8 +356,7 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
212
  return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
213
 
214
  lang_code = LANGUAGE_CODES.get(language, language)
215
- logger.info(
216
- f"[{request_id}] πŸ”„ Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
217
 
218
  # Create a request-specific temp directory to avoid conflicts
219
  temp_dir = os.path.join(output_dir, f"temp_{request_id}")
@@ -244,11 +387,11 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
244
  # Transcribe user audio
245
  try:
246
  logger.info(f"[{request_id}] πŸ”„ Transcribing user audio")
 
247
  inputs = asr_processor(
248
  user_waveform,
249
  sampling_rate=sample_rate,
250
- return_tensors="pt",
251
- language=lang_code
252
  )
253
  inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
254
 
@@ -286,15 +429,14 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
286
  ref_waveform = ref_waveform.squeeze().numpy()
287
 
288
  # Transcribe reference audio - use the local asr_model and asr_processor
 
289
  inputs = asr_processor(
290
  ref_waveform,
291
  sampling_rate=sample_rate,
292
- return_tensors="pt",
293
- language=lang_code
294
  )
295
  inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
296
 
297
-
298
  with torch.no_grad():
299
  logits = asr_model(**inputs).logits
300
  ids = torch.argmax(logits, dim=-1)[0]
@@ -387,21 +529,4 @@ def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
387
  except:
388
  pass
389
 
390
- return jsonify({"error": f"Internal server error: {str(e)}"}), 500
391
-
392
- def setup_reference_patterns(reference_dir):
393
- """Create standard reference pattern directories if they don't exist"""
394
- reference_patterns = [
395
- "mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
396
- "komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
397
- "pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku"
398
- ]
399
-
400
- for pattern in reference_patterns:
401
- pattern_dir = os.path.join(reference_dir, pattern)
402
- if not os.path.exists(pattern_dir):
403
- try:
404
- os.makedirs(pattern_dir, exist_ok=True)
405
- logger.info(f"πŸ“ Created reference pattern directory: {pattern_dir}")
406
- except Exception as e:
407
- logger.error(f"❌ Failed to create reference pattern directory {pattern_dir}: {str(e)}")
 
31
  matcher = SequenceMatcher(None, clean1, clean2)
32
  return matcher.ratio() * 100
33
 
34
+ def setup_reference_patterns(reference_dir, sample_rate=16000):
35
+ """Create standard reference pattern directories and dummy files if needed"""
36
+ reference_patterns = [
37
+ "mayap_a_abak", "mayap_a_ugtu", "mayap_a_gatpanapun", "mayap_a_bengi",
38
+ "komusta_ka", "malaus_ko_pu", "malaus_kayu", "agaganaka_da_ka",
39
+ "pagdulapan_da_ka", "kaluguran_da_ka", "dakal_a_salamat", "panapaya_mu_ku"
40
+ ]
41
+
42
+ created_dirs = 0
43
+ created_files = 0
44
+
45
+ for pattern in reference_patterns:
46
+ pattern_dir = os.path.join(reference_dir, pattern)
47
+ if not os.path.exists(pattern_dir):
48
+ try:
49
+ os.makedirs(pattern_dir, exist_ok=True)
50
+ logger.info(f"πŸ“ Created reference pattern directory: {pattern_dir}")
51
+ created_dirs += 1
52
+ except Exception as e:
53
+ logger.error(f"❌ Failed to create reference pattern directory {pattern_dir}: {str(e)}")
54
+ continue
55
+
56
+ # Check if directory has any WAV files, add a dummy if not
57
+ wav_files = glob.glob(os.path.join(pattern_dir, "*.wav"))
58
+ if not wav_files:
59
+ try:
60
+ dummy_path = os.path.join(pattern_dir, "dummy_reference.wav")
61
+ # Create a 1-second silent audio file - not completely silent to avoid transcription issues
62
+ # Adding a small amount of noise helps ASR models detect something
63
+ silent = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
64
+ # Add a tiny bit of noise
65
+ for i in range(50, 950, 300):
66
+ silent = silent.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
67
+ silent.export(dummy_path, format="wav")
68
+ logger.info(f"πŸ“„ Created dummy reference file: {dummy_path}")
69
+ created_files += 1
70
+ except Exception as e:
71
+ logger.error(f"❌ Failed to create dummy file in {pattern_dir}: {str(e)}")
72
+
73
+ return created_dirs, created_files
74
+
75
+ def search_reference_directories():
76
+ """Search for possible reference directories in various locations"""
77
+ possible_locations = [
78
+ "./reference_audios",
79
+ "../reference_audios",
80
+ "/app/reference_audios",
81
+ "/tmp/reference_audios",
82
+ os.path.join(os.path.dirname(os.path.abspath(__file__)), "reference_audios")
83
+ ]
84
+
85
+ found_dirs = []
86
+ for location in possible_locations:
87
+ if os.path.exists(location) and os.path.isdir(location):
88
+ access_info = {
89
+ "readable": os.access(location, os.R_OK),
90
+ "writable": os.access(location, os.W_OK),
91
+ "executable": os.access(location, os.X_OK)
92
+ }
93
+
94
+ # Count pattern directories
95
+ pattern_dirs = [d for d in os.listdir(location)
96
+ if os.path.isdir(os.path.join(location, d))]
97
+
98
+ # Count total wav files
99
+ wav_count = 0
100
+ for pattern in pattern_dirs:
101
+ pattern_path = os.path.join(location, pattern)
102
+ wav_count += len(glob.glob(os.path.join(pattern_path, "*.wav")))
103
+
104
+ found_dirs.append({
105
+ "path": location,
106
+ "access": access_info,
107
+ "pattern_dirs": len(pattern_dirs),
108
+ "wav_files": wav_count
109
+ })
110
+
111
+ return found_dirs
112
 
113
  def init_reference_audio(reference_dir, output_dir):
114
+ """Initialize reference audio directories and return the working directory path"""
115
  try:
116
  # Create the output directory first
117
  os.makedirs(output_dir, exist_ok=True)
118
  logger.info(f"πŸ“ Created output directory: {output_dir}")
119
 
120
+ # Search for existing reference directories
121
+ found_dirs = search_reference_directories()
122
+ for directory in found_dirs:
123
+ logger.info(f"πŸ” Found reference directory: {directory['path']} "
124
+ f"(patterns: {directory['pattern_dirs']}, wav files: {directory['wav_files']})")
125
+
126
+ # First, try to use the provided reference_dir
127
+ working_dir = reference_dir
 
128
 
129
+ # Check if reference_dir is accessible and writable
130
+ if not os.path.exists(reference_dir) or not os.access(reference_dir, os.W_OK):
131
+ logger.warning(f"⚠️ Provided reference directory {reference_dir} is not writable")
132
 
133
+ # Try to use a found directory that has patterns and is writable
134
+ for directory in found_dirs:
135
+ if directory['access']['writable'] and directory['pattern_dirs'] > 0:
136
+ working_dir = directory['path']
137
+ logger.info(f"βœ… Using found reference directory: {working_dir}")
138
+ break
139
+ else:
140
+ # If no suitable directory found, create one in /tmp
141
+ working_dir = os.path.join('/tmp', 'reference_audios')
142
+ logger.warning(f"⚠️ Using fallback reference directory in /tmp: {working_dir}")
143
+
144
+ # Ensure the working directory exists
145
+ os.makedirs(working_dir, exist_ok=True)
146
+ logger.info(f"πŸ“ Using reference directory: {working_dir}")
147
+
148
+ # Set up reference pattern directories with dummy files if needed
149
+ dirs_created, files_created = setup_reference_patterns(working_dir)
150
+ logger.info(f"πŸ“Š Created {dirs_created} directories and {files_created} dummy files")
151
+
152
+ # Try to copy reference files from other found directories to working directory if needed
153
+ if files_created > 0 and len(found_dirs) > 1:
154
+ # Try to find a directory with existing WAV files
155
+ for directory in found_dirs:
156
+ if directory['path'] != working_dir and directory['wav_files'] > 0:
157
+ try:
158
+ source_dir = directory['path']
159
+ logger.info(f"πŸ”„ Copying reference files from {source_dir} to {working_dir}")
160
+
161
+ # Copy pattern directories that have WAV files
162
+ for item in os.listdir(source_dir):
163
+ src_path = os.path.join(source_dir, item)
164
+ if os.path.isdir(src_path) and glob.glob(os.path.join(src_path, "*.wav")):
165
+ dst_path = os.path.join(working_dir, item)
166
+
167
+ # Copy each WAV file individually
168
+ for wav_file in glob.glob(os.path.join(src_path, "*.wav")):
169
+ wav_name = os.path.basename(wav_file)
170
+ dst_file = os.path.join(dst_path, wav_name)
171
+ if not os.path.exists(dst_file):
172
+ shutil.copy2(wav_file, dst_file)
173
+ logger.info(f"πŸ“„ Copied {wav_name} to {dst_path}")
174
+
175
+ break
176
+ except Exception as e:
177
+ logger.warning(f"⚠️ Failed to copy reference files: {str(e)}")
178
+
179
+ # Log the final contents
180
+ pattern_dirs = [d for d in os.listdir(working_dir)
181
+ if os.path.isdir(os.path.join(working_dir, d))]
182
+ logger.info(f"πŸ“Š Final reference directory has {len(pattern_dirs)} pattern directories")
183
+
184
+ total_wav_files = 0
185
+ for pattern in pattern_dirs:
186
+ pattern_path = os.path.join(working_dir, pattern)
187
+ wav_files = glob.glob(os.path.join(pattern_path, "*.wav"))
188
+ total_wav_files += len(wav_files)
189
+ logger.info(f" - {pattern}: {len(wav_files)} WAV files")
190
+
191
+ logger.info(f"πŸ“Š Total reference WAV files: {total_wav_files}")
192
 
193
+ return working_dir
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  except Exception as e:
196
  logger.error(f"❌ Failed to set up reference audio directory: {str(e)}")
197
+ logger.debug(f"Stack trace: {traceback.format_exc()}")
198
+
199
+ # As a last resort, try to use /tmp
200
+ fallback_dir = os.path.join('/tmp', 'reference_audios')
201
+ try:
202
+ os.makedirs(fallback_dir, exist_ok=True)
203
+ setup_reference_patterns(fallback_dir)
204
+ logger.warning(f"⚠️ Using emergency fallback directory: {fallback_dir}")
205
+ return fallback_dir
206
+ except:
207
+ logger.critical("πŸ’₯ CRITICAL: Failed to create even a fallback directory")
208
+ return reference_dir
209
 
210
  def handle_upload_reference(request, reference_dir, sample_rate):
211
  """Handle upload of reference audio files"""
 
230
  logger.warning(f"⚠️ Invalid reference word: {reference_word}")
231
  return jsonify({"error": f"Invalid reference word. Available: {reference_patterns}"}), 400
232
 
233
+ # Make sure we have a writable reference directory
234
+ if not os.path.exists(reference_dir):
235
+ reference_dir = os.path.join('/tmp', 'reference_audios')
236
+ os.makedirs(reference_dir, exist_ok=True)
237
+ logger.warning(f"⚠️ Using alternate reference directory for upload: {reference_dir}")
238
+
239
  # Create directory for reference pattern if it doesn't exist
240
  pattern_dir = os.path.join(reference_dir, reference_word)
241
  os.makedirs(pattern_dir, exist_ok=True)
242
 
243
  # Save the reference audio file
244
  audio_file = request.files["audio"]
245
+ filename = secure_filename(audio_file.filename)
246
+
247
+ # Ensure filename has .wav extension
248
+ if not filename.lower().endswith('.wav'):
249
+ base_name = os.path.splitext(filename)[0]
250
+ filename = f"{base_name}.wav"
251
+
252
+ file_path = os.path.join(pattern_dir, filename)
253
+
254
+ # Create a temporary file first, then convert to WAV
255
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
256
+ audio_file.save(temp_file.name)
257
+ temp_path = temp_file.name
258
+
259
+ try:
260
+ # Process the audio file
261
+ audio = AudioSegment.from_file(temp_path)
262
+ audio = audio.set_frame_rate(sample_rate).set_channels(1)
263
+ audio.export(file_path, format="wav")
264
+ logger.info(f"βœ… Reference audio saved successfully for {reference_word}: {file_path}")
265
+
266
+ # Clean up temp file
267
  try:
268
+ os.unlink(temp_path)
269
+ except:
270
+ pass
271
+ except Exception as e:
272
+ logger.error(f"❌ Reference audio processing failed: {str(e)}")
273
+ return jsonify({"error": f"Audio processing failed: {str(e)}"}), 500
 
 
 
 
 
274
 
275
  # Count how many references we have now
276
  references = glob.glob(os.path.join(pattern_dir, "*.wav"))
277
  return jsonify({
278
  "message": "Reference audio uploaded successfully",
279
  "reference_word": reference_word,
280
+ "file": filename,
281
  "total_references": len(references)
282
  })
283
 
 
286
  logger.debug(f"Stack trace: {traceback.format_exc()}")
287
  return jsonify({"error": f"Internal server error: {str(e)}"}), 500
288
 
 
289
  def handle_evaluation_request(request, reference_dir, output_dir, sample_rate):
290
+ """Handle pronunciation evaluation requests"""
291
  request_id = f"req-{id(request)}" # Create unique ID for this request
292
  logger.info(f"[{request_id}] πŸ†• Starting new pronunciation evaluation request")
293
 
 
319
  reference_dir_path = os.path.join(reference_dir, reference_locator)
320
  logger.info(f"[{request_id}] πŸ“ Reference directory path: {reference_dir_path}")
321
 
322
+ # Make sure the reference directory exists
323
+ if not os.path.exists(reference_dir_path):
324
+ try:
325
+ os.makedirs(reference_dir_path, exist_ok=True)
326
+ logger.warning(f"[{request_id}] ⚠️ Created missing reference directory: {reference_dir_path}")
327
+ except Exception as e:
328
+ logger.error(f"[{request_id}] ❌ Failed to create reference directory: {str(e)}")
329
+ return jsonify({"error": f"Reference audio directory not found: {reference_locator}"}), 404
330
+
331
+ # Check for reference files
332
  reference_files = glob.glob(os.path.join(reference_dir_path, "*.wav"))
333
  logger.info(f"[{request_id}] πŸ“ Found {len(reference_files)} reference files")
334
 
335
+ # If no reference files exist, create a dummy reference file
336
  if not reference_files:
337
  logger.warning(f"[{request_id}] ⚠️ No reference audio files found in {reference_dir_path}")
338
 
339
+ # Create a dummy reference file
340
  try:
341
  dummy_file_path = os.path.join(reference_dir_path, "dummy_reference.wav")
342
  logger.info(f"[{request_id}] πŸ”„ Creating dummy reference file: {dummy_file_path}")
343
 
344
+ # Create a 1-second audio file with a slight sound
345
  silent_audio = AudioSegment.silent(duration=1000, frame_rate=sample_rate)
346
+ # Add a tiny bit of noise to help ASR
347
+ for i in range(50, 950, 300):
348
+ silent_audio = silent_audio.overlay(AudioSegment.silent(duration=50, frame_rate=sample_rate) + 3, position=i)
349
  silent_audio.export(dummy_file_path, format="wav")
350
 
351
  # Add it to the list of reference files
 
356
  return jsonify({"error": f"No reference audio found for {reference_locator}"}), 404
357
 
358
  lang_code = LANGUAGE_CODES.get(language, language)
359
+ logger.info(f"[{request_id}] πŸ”„ Evaluating pronunciation for reference: {reference_locator} with language code: {lang_code}")
 
360
 
361
  # Create a request-specific temp directory to avoid conflicts
362
  temp_dir = os.path.join(output_dir, f"temp_{request_id}")
 
387
  # Transcribe user audio
388
  try:
389
  logger.info(f"[{request_id}] πŸ”„ Transcribing user audio")
390
+ # Remove language parameter if causing warnings
391
  inputs = asr_processor(
392
  user_waveform,
393
  sampling_rate=sample_rate,
394
+ return_tensors="pt"
 
395
  )
396
  inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
397
 
 
429
  ref_waveform = ref_waveform.squeeze().numpy()
430
 
431
  # Transcribe reference audio - use the local asr_model and asr_processor
432
+ # Remove language parameter if causing warnings
433
  inputs = asr_processor(
434
  ref_waveform,
435
  sampling_rate=sample_rate,
436
+ return_tensors="pt"
 
437
  )
438
  inputs = {k: v.to(asr_model.device) for k, v in inputs.items()}
439
 
 
440
  with torch.no_grad():
441
  logits = asr_model(**inputs).logits
442
  ids = torch.argmax(logits, dim=-1)[0]
 
529
  except:
530
  pass
531
 
532
+ return jsonify({"error": f"Internal server error: {str(e)}"}), 500