roychao19477 commited on
Commit
12fb517
·
1 Parent(s): d44085e
Files changed (1) hide show
  1. app.py +26 -4
app.py CHANGED
@@ -60,7 +60,6 @@ import os
60
  import tempfile
61
  from ultralytics import YOLO
62
  from moviepy import ImageSequenceClip
63
- from moviepy.video import fx as vfx
64
  from scipy.io import wavfile
65
  from avse_code import run_avse
66
 
@@ -136,11 +135,35 @@ def extract_resampled_audio(video_path, target_sr=16000):
136
  torchaudio.save(resampled_audio_path, waveform, sample_rate=target_sr)
137
  return resampled_audio_path
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  @spaces.GPU
141
  def extract_faces(video_file):
 
 
 
142
  cap = cv2.VideoCapture(video_file)
143
- fps = cap.get(cv2.CAP_PROP_FPS)
144
  frames = []
145
 
146
  while True:
@@ -196,8 +219,7 @@ def extract_faces(video_file):
196
  [cv2.cvtColor(cv2.resize(f, (224, 224)), cv2.COLOR_BGR2RGB) for f in frames],
197
  fps=fps
198
  )
199
- from moviepy.video.fx.MirrorY import MirrorY
200
- clip = clip.with_effects([MirrorY().copy()])
201
 
202
  # Save audio from original, resampled to 16kHz
203
  audio_path = os.path.join(tmpdir, "audio_16k.wav")
 
60
  import tempfile
61
  from ultralytics import YOLO
62
  from moviepy import ImageSequenceClip
 
63
  from scipy.io import wavfile
64
  from avse_code import run_avse
65
 
 
135
  torchaudio.save(resampled_audio_path, waveform, sample_rate=target_sr)
136
  return resampled_audio_path
137
 
138
+ import ffmpeg
139
+ import os
140
+
141
+ def maybe_downsample(video_path):
142
+ probe = ffmpeg.probe(video_path)
143
+ video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']
144
+ width = int(video_streams[0]['width'])
145
+ height = int(video_streams[0]['height'])
146
+
147
+ if height > 720:
148
+ downsampled_path = tempfile.mktemp(suffix="_720p.mp4")
149
+ (
150
+ ffmpeg
151
+ .input(video_path)
152
+ .filter('scale', -1, 720)
153
+ .output(downsampled_path, vcodec='libx264', acodec='aac')
154
+ .overwrite_output()
155
+ .run(quiet=True)
156
+ )
157
+ return downsampled_path
158
+ else:
159
+ return video_path
160
 
161
  @spaces.GPU
162
  def extract_faces(video_file):
163
+ #cap = cv2.VideoCapture(video_file)
164
+ #fps = cap.get(cv2.CAP_PROP_FPS)
165
+ video_file = maybe_downsample(video_file)
166
  cap = cv2.VideoCapture(video_file)
 
167
  frames = []
168
 
169
  while True:
 
219
  [cv2.cvtColor(cv2.resize(f, (224, 224)), cv2.COLOR_BGR2RGB) for f in frames],
220
  fps=fps
221
  )
222
+ clip.write_videofile(output_path, codec="libx264", audio=False, fps=25)
 
223
 
224
  # Save audio from original, resampled to 16kHz
225
  audio_path = os.path.join(tmpdir, "audio_16k.wav")