WebashalarForML commited on
Commit
faa8ce9
·
verified ·
1 Parent(s): e9f94b1

Update inference2.py

Browse files
Files changed (1) hide show
  1. inference2.py +10 -5
inference2.py CHANGED
@@ -235,14 +235,16 @@ def run_inference(
235
  frame = frame[y1:y2, x1:x2]
236
  full_frames.append(frame)
237
 
238
- print ("Number of frames available for inference: "+str(len(full_frames)))
239
  if not full_frames:
240
  raise ValueError("No frames could be read from the input face file.")
241
 
242
  temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
 
 
243
  if not audio_path.endswith('.wav'):
244
  print('Extracting raw audio...')
245
- command = f'ffmpeg -y -i "{audio_path}" -strict -2 "{temp_audio_path}"'
246
  try:
247
  subprocess.run(command, shell=True, check=True, capture_output=True)
248
  audio_path = temp_audio_path
@@ -250,15 +252,18 @@ def run_inference(
250
  print(f"FFmpeg error: {e.stderr.decode()}")
251
  raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
252
  else:
253
- # Copy the wav file to temp if it's already wav to maintain consistency in naming
254
  shutil.copy(audio_path, temp_audio_path)
255
  audio_path = temp_audio_path
256
 
257
-
258
  wav = audio.load_wav(audio_path, 16000)
259
- # >>> CRUCIAL FIX: Explicitly cast to float32 for resampy/numba compatibility <<<
260
  wav = wav.astype(np.float32)
261
 
 
 
 
 
 
262
  mel = audio.melspectrogram(wav)
263
  print("Mel spectrogram shape:", mel.shape)
264
 
 
235
  frame = frame[y1:y2, x1:x2]
236
  full_frames.append(frame)
237
 
238
+ print("Number of frames available for inference:", len(full_frames))
239
  if not full_frames:
240
  raise ValueError("No frames could be read from the input face file.")
241
 
242
  temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
243
+
244
+ # Updated FFmpeg command: force mono, 16-bit, 16kHz
245
  if not audio_path.endswith('.wav'):
246
  print('Extracting raw audio...')
247
+ command = f'ffmpeg -y -i "{audio_path}" -ac 1 -ar 16000 -sample_fmt s16 "{temp_audio_path}"'
248
  try:
249
  subprocess.run(command, shell=True, check=True, capture_output=True)
250
  audio_path = temp_audio_path
 
252
  print(f"FFmpeg error: {e.stderr.decode()}")
253
  raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
254
  else:
 
255
  shutil.copy(audio_path, temp_audio_path)
256
  audio_path = temp_audio_path
257
 
258
+ # Load WAV audio
259
  wav = audio.load_wav(audio_path, 16000)
 
260
  wav = wav.astype(np.float32)
261
 
262
+ # Check audio length
263
+ print(f"Extracted audio samples: {len(wav)}, duration: {len(wav)/16000:.2f} sec")
264
+ if len(wav) < 16000:
265
+ raise ValueError(f"Audio is too short after conversion: only {len(wav)} samples. Please upload a longer clip.")
266
+
267
  mel = audio.melspectrogram(wav)
268
  print("Mel spectrogram shape:", mel.shape)
269