Spaces:

rc19477
/

avse_dev_only

Sleeping

roychao19477 commited on Jul 1

Commit

12fb517

1 Parent(s): d44085e

Fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,7 +60,6 @@ import os
 import tempfile
 from ultralytics import YOLO
 from moviepy import ImageSequenceClip
-from moviepy.video import fx as vfx
 from scipy.io import wavfile
 from avse_code import run_avse
@@ -136,11 +135,35 @@ def extract_resampled_audio(video_path, target_sr=16000):
     torchaudio.save(resampled_audio_path, waveform, sample_rate=target_sr)
     return resampled_audio_path
 @spaces.GPU
 def extract_faces(video_file):
     cap = cv2.VideoCapture(video_file)
-    fps = cap.get(cv2.CAP_PROP_FPS)
     frames = []
     while True:
@@ -196,8 +219,7 @@ def extract_faces(video_file):
         [cv2.cvtColor(cv2.resize(f, (224, 224)), cv2.COLOR_BGR2RGB) for f in frames],
         fps=fps
     )
-    from moviepy.video.fx.MirrorY import MirrorY
-    clip = clip.with_effects([MirrorY().copy()])
     # Save audio from original, resampled to 16kHz
     audio_path = os.path.join(tmpdir, "audio_16k.wav")

 import tempfile
 from ultralytics import YOLO
 from moviepy import ImageSequenceClip
 from scipy.io import wavfile
 from avse_code import run_avse
     torchaudio.save(resampled_audio_path, waveform, sample_rate=target_sr)
     return resampled_audio_path
+import ffmpeg
+import os
+def maybe_downsample(video_path):
+    probe = ffmpeg.probe(video_path)
+    video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
+    width = int(video_streams[0]['width'])
+    height = int(video_streams[0]['height'])
+    if height > 720:
+        downsampled_path = tempfile.mktemp(suffix="_720p.mp4")
+        (
+            ffmpeg
+            .input(video_path)
+            .filter('scale', -1, 720)
+            .output(downsampled_path, vcodec='libx264', acodec='aac')
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        return downsampled_path
+    else:
+        return video_path
 @spaces.GPU
 def extract_faces(video_file):
+    #cap = cv2.VideoCapture(video_file)
+    #fps = cap.get(cv2.CAP_PROP_FPS)
+    video_file = maybe_downsample(video_file)
     cap = cv2.VideoCapture(video_file)
     frames = []
     while True:
         [cv2.cvtColor(cv2.resize(f, (224, 224)), cv2.COLOR_BGR2RGB) for f in frames],
         fps=fps
     )
+    clip.write_videofile(output_path, codec="libx264", audio=False, fps=25)
     # Save audio from original, resampled to 16kHz
     audio_path = os.path.join(tmpdir, "audio_16k.wav")