Spaces:
Running
Running
Update inference2.py
Browse files- inference2.py +10 -5
inference2.py
CHANGED
@@ -235,14 +235,16 @@ def run_inference(
|
|
235 |
frame = frame[y1:y2, x1:x2]
|
236 |
full_frames.append(frame)
|
237 |
|
238 |
-
print
|
239 |
if not full_frames:
|
240 |
raise ValueError("No frames could be read from the input face file.")
|
241 |
|
242 |
temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
|
|
|
|
|
243 |
if not audio_path.endswith('.wav'):
|
244 |
print('Extracting raw audio...')
|
245 |
-
command = f'ffmpeg -y -i "{audio_path}" -
|
246 |
try:
|
247 |
subprocess.run(command, shell=True, check=True, capture_output=True)
|
248 |
audio_path = temp_audio_path
|
@@ -250,15 +252,18 @@ def run_inference(
|
|
250 |
print(f"FFmpeg error: {e.stderr.decode()}")
|
251 |
raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
|
252 |
else:
|
253 |
-
# Copy the wav file to temp if it's already wav to maintain consistency in naming
|
254 |
shutil.copy(audio_path, temp_audio_path)
|
255 |
audio_path = temp_audio_path
|
256 |
|
257 |
-
|
258 |
wav = audio.load_wav(audio_path, 16000)
|
259 |
-
# >>> CRUCIAL FIX: Explicitly cast to float32 for resampy/numba compatibility <<<
|
260 |
wav = wav.astype(np.float32)
|
261 |
|
|
|
|
|
|
|
|
|
|
|
262 |
mel = audio.melspectrogram(wav)
|
263 |
print("Mel spectrogram shape:", mel.shape)
|
264 |
|
|
|
235 |
frame = frame[y1:y2, x1:x2]
|
236 |
full_frames.append(frame)
|
237 |
|
238 |
+
print("Number of frames available for inference:", len(full_frames))
|
239 |
if not full_frames:
|
240 |
raise ValueError("No frames could be read from the input face file.")
|
241 |
|
242 |
temp_audio_path = os.path.join(temp_dir, 'temp_audio.wav')
|
243 |
+
|
244 |
+
# Updated FFmpeg command: force mono, 16-bit, 16kHz
|
245 |
if not audio_path.endswith('.wav'):
|
246 |
print('Extracting raw audio...')
|
247 |
+
command = f'ffmpeg -y -i "{audio_path}" -ac 1 -ar 16000 -sample_fmt s16 "{temp_audio_path}"'
|
248 |
try:
|
249 |
subprocess.run(command, shell=True, check=True, capture_output=True)
|
250 |
audio_path = temp_audio_path
|
|
|
252 |
print(f"FFmpeg error: {e.stderr.decode()}")
|
253 |
raise RuntimeError(f"Failed to extract audio from {audio_path}. Error: {e.stderr.decode()}")
|
254 |
else:
|
|
|
255 |
shutil.copy(audio_path, temp_audio_path)
|
256 |
audio_path = temp_audio_path
|
257 |
|
258 |
+
# Load WAV audio
|
259 |
wav = audio.load_wav(audio_path, 16000)
|
|
|
260 |
wav = wav.astype(np.float32)
|
261 |
|
262 |
+
# Check audio length
|
263 |
+
print(f"Extracted audio samples: {len(wav)}, duration: {len(wav)/16000:.2f} sec")
|
264 |
+
if len(wav) < 16000:
|
265 |
+
raise ValueError(f"Audio is too short after conversion: only {len(wav)} samples. Please upload a longer clip.")
|
266 |
+
|
267 |
mel = audio.melspectrogram(wav)
|
268 |
print("Mel spectrogram shape:", mel.shape)
|
269 |
|