Spaces:
Running
Running
Update inference2.py
Browse files- inference2.py +5 -2
inference2.py
CHANGED
@@ -70,8 +70,8 @@ def face_detect(images, pads, face_det_batch_size, nosmooth, img_size):
|
|
70 |
y1 = max(0, rect[1] - pady1)
|
71 |
y2 = min(image.shape[0], rect[3] + pady2)
|
72 |
x1 = max(0, rect[0] - padx1)
|
73 |
-
x2 = min(image.shape[1], rect[2] + padx2)
|
74 |
-
|
75 |
results.append([x1, y1, x2, y2])
|
76 |
|
77 |
boxes = np.array(results)
|
@@ -256,6 +256,9 @@ def run_inference(
|
|
256 |
|
257 |
|
258 |
wav = audio.load_wav(audio_path, 16000)
|
|
|
|
|
|
|
259 |
mel = audio.melspectrogram(wav)
|
260 |
print("Mel spectrogram shape:", mel.shape)
|
261 |
|
|
|
70 |
y1 = max(0, rect[1] - pady1)
|
71 |
y2 = min(image.shape[0], rect[3] + pady2)
|
72 |
x1 = max(0, rect[0] - padx1)
|
73 |
+
x2 = min(image.shape[1], image.shape[1], rect[2] + padx2) # Corrected typo: image.shape[1] twice
|
74 |
+
|
75 |
results.append([x1, y1, x2, y2])
|
76 |
|
77 |
boxes = np.array(results)
|
|
|
256 |
|
257 |
|
258 |
wav = audio.load_wav(audio_path, 16000)
|
259 |
+
# >>> CRUCIAL FIX: Explicitly cast to float32 for resampy/numba compatibility <<<
|
260 |
+
wav = wav.astype(np.float32)
|
261 |
+
|
262 |
mel = audio.melspectrogram(wav)
|
263 |
print("Mel spectrogram shape:", mel.shape)
|
264 |
|