Spaces:
Running
on
A100
Running
on
A100
- app.py +11 -142
- video_highlight_detector.py +1 -6
app.py
CHANGED
@@ -60,15 +60,15 @@ def create_ui(examples_path: str):
|
|
60 |
label=f"Highlights ({format_duration(example['highlights']['duration_seconds'])})",
|
61 |
interactive=False
|
62 |
)
|
63 |
-
with gr.Accordion("
|
64 |
-
gr.Markdown(f"
|
65 |
-
gr.Markdown(f"
|
66 |
|
67 |
gr.Markdown("## Try It Yourself!")
|
68 |
with gr.Row():
|
69 |
with gr.Column(scale=1):
|
70 |
input_video = gr.Video(
|
71 |
-
label="Upload your video (max
|
72 |
interactive=True
|
73 |
)
|
74 |
process_btn = gr.Button("Process Video", variant="primary")
|
@@ -83,7 +83,7 @@ def create_ui(examples_path: str):
|
|
83 |
status = gr.Markdown()
|
84 |
|
85 |
analysis_accordion = gr.Accordion(
|
86 |
-
"
|
87 |
open=True,
|
88 |
visible=False
|
89 |
)
|
@@ -106,9 +106,9 @@ def create_ui(examples_path: str):
|
|
106 |
|
107 |
try:
|
108 |
duration = get_video_duration_seconds(video)
|
109 |
-
if duration >
|
110 |
yield [
|
111 |
-
"Video must be shorter than
|
112 |
"",
|
113 |
"",
|
114 |
gr.update(visible=False),
|
@@ -122,14 +122,14 @@ def create_ui(examples_path: str):
|
|
122 |
"",
|
123 |
"",
|
124 |
gr.update(visible=False),
|
125 |
-
gr.update(visible=
|
126 |
]
|
127 |
|
128 |
model, processor = load_model()
|
129 |
detector = BatchedVideoHighlightDetector(
|
130 |
model,
|
131 |
processor,
|
132 |
-
batch_size=
|
133 |
)
|
134 |
|
135 |
yield [
|
@@ -141,7 +141,7 @@ def create_ui(examples_path: str):
|
|
141 |
]
|
142 |
|
143 |
video_desc = detector.analyze_video_content(video)
|
144 |
-
formatted_desc = f"
|
145 |
|
146 |
yield [
|
147 |
"Determining highlight types...",
|
@@ -152,7 +152,7 @@ def create_ui(examples_path: str):
|
|
152 |
]
|
153 |
|
154 |
highlights = detector.determine_highlights(video_desc)
|
155 |
-
formatted_highlights = f"
|
156 |
|
157 |
# Get all segments
|
158 |
segments = get_fixed_30s_segments(video)
|
@@ -223,137 +223,6 @@ def create_ui(examples_path: str):
|
|
223 |
)
|
224 |
|
225 |
return app
|
226 |
-
# gr.Markdown("## Try It Yourself!")
|
227 |
-
# with gr.Row():
|
228 |
-
# with gr.Column(scale=1):
|
229 |
-
# input_video = gr.Video(
|
230 |
-
# label="Upload your video (max 20 minutes)",
|
231 |
-
# interactive=True
|
232 |
-
# )
|
233 |
-
# process_btn = gr.Button("Process Video", variant="primary")
|
234 |
-
|
235 |
-
# with gr.Column(scale=1):
|
236 |
-
# output_video = gr.Video(
|
237 |
-
# label="Highlight Video",
|
238 |
-
# visible=False,
|
239 |
-
# interactive=False,
|
240 |
-
# )
|
241 |
-
|
242 |
-
# status = gr.Markdown()
|
243 |
-
|
244 |
-
# analysis_accordion = gr.Accordion(
|
245 |
-
# "Model chain of thought details",
|
246 |
-
# open=True,
|
247 |
-
# visible=False
|
248 |
-
# )
|
249 |
-
|
250 |
-
# with analysis_accordion:
|
251 |
-
# video_description = gr.Markdown("", elem_id="video_desc")
|
252 |
-
# highlight_types = gr.Markdown("", elem_id="highlight_types")
|
253 |
-
|
254 |
-
# @spaces.GPU
|
255 |
-
# def on_process(video):
|
256 |
-
# if not video:
|
257 |
-
# yield [
|
258 |
-
# "Please upload a video", # status
|
259 |
-
# "", # video_description
|
260 |
-
# "", # highlight_types
|
261 |
-
# gr.update(visible=False), # output_video
|
262 |
-
# gr.update(visible=False) # analysis_accordion
|
263 |
-
# ]
|
264 |
-
# return
|
265 |
-
|
266 |
-
# try:
|
267 |
-
# duration = get_video_duration_seconds(video)
|
268 |
-
# if duration > 1200: # 20 minutes
|
269 |
-
# yield [
|
270 |
-
# "Video must be shorter than 20 minutes",
|
271 |
-
# "",
|
272 |
-
# "",
|
273 |
-
# gr.update(visible=False),
|
274 |
-
# gr.update(visible=False)
|
275 |
-
# ]
|
276 |
-
# return
|
277 |
-
|
278 |
-
# # Make accordion visible as soon as processing starts
|
279 |
-
# yield [
|
280 |
-
# "Loading model...",
|
281 |
-
# "",
|
282 |
-
# "",
|
283 |
-
# gr.update(visible=False),
|
284 |
-
# gr.update(visible=True)
|
285 |
-
# ]
|
286 |
-
|
287 |
-
# model, processor = load_model()
|
288 |
-
# detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
|
289 |
-
|
290 |
-
# yield [
|
291 |
-
# "Analyzing video content...",
|
292 |
-
# "",
|
293 |
-
# "",
|
294 |
-
# gr.update(visible=False),
|
295 |
-
# gr.update(visible=True)
|
296 |
-
# ]
|
297 |
-
|
298 |
-
# video_desc = detector.analyze_video_content(video)
|
299 |
-
# formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
300 |
-
|
301 |
-
# # Update description as soon as it's available
|
302 |
-
# yield [
|
303 |
-
# "Determining highlight types...",
|
304 |
-
# formatted_desc,
|
305 |
-
# "",
|
306 |
-
# gr.update(visible=False),
|
307 |
-
# gr.update(visible=True)
|
308 |
-
# ]
|
309 |
-
|
310 |
-
# highlights = detector.determine_highlights(video_desc)
|
311 |
-
# formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
|
312 |
-
|
313 |
-
# # Update highlights as soon as they're available
|
314 |
-
# yield [
|
315 |
-
# "Detecting and extracting highlights...",
|
316 |
-
# formatted_desc,
|
317 |
-
# formatted_highlights,
|
318 |
-
# gr.update(visible=False),
|
319 |
-
# gr.update(visible=True)
|
320 |
-
# ]
|
321 |
-
|
322 |
-
# with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
|
323 |
-
# temp_output = tmp_file.name
|
324 |
-
# detector.create_highlight_video(video, temp_output)
|
325 |
-
|
326 |
-
# yield [
|
327 |
-
# "Processing complete!",
|
328 |
-
# formatted_desc,
|
329 |
-
# formatted_highlights,
|
330 |
-
# gr.update(value=temp_output, visible=True),
|
331 |
-
# gr.update(visible=True)
|
332 |
-
# ]
|
333 |
-
|
334 |
-
# except Exception as e:
|
335 |
-
# yield [
|
336 |
-
# f"Error processing video: {str(e)}",
|
337 |
-
# "",
|
338 |
-
# "",
|
339 |
-
# gr.update(visible=False),
|
340 |
-
# gr.update(visible=False)
|
341 |
-
# ]
|
342 |
-
|
343 |
-
# process_btn.click(
|
344 |
-
# on_process,
|
345 |
-
# inputs=[input_video],
|
346 |
-
# outputs=[
|
347 |
-
# status,
|
348 |
-
# video_description,
|
349 |
-
# highlight_types,
|
350 |
-
# output_video,
|
351 |
-
# analysis_accordion
|
352 |
-
# ],
|
353 |
-
# queue=True,
|
354 |
-
# )
|
355 |
-
|
356 |
-
# return app
|
357 |
|
358 |
if __name__ == "__main__":
|
359 |
# Initialize CUDA
|
|
|
60 |
label=f"Highlights ({format_duration(example['highlights']['duration_seconds'])})",
|
61 |
interactive=False
|
62 |
)
|
63 |
+
with gr.Accordion("Chain of thought details", open=False):
|
64 |
+
gr.Markdown(f"### Summary: {example['analysis']['video_description']}")
|
65 |
+
gr.Markdown(f"### Highlights to search for: {example['analysis']['highlight_types']}")
|
66 |
|
67 |
gr.Markdown("## Try It Yourself!")
|
68 |
with gr.Row():
|
69 |
with gr.Column(scale=1):
|
70 |
input_video = gr.Video(
|
71 |
+
label="Upload your video (max 30 minutes)",
|
72 |
interactive=True
|
73 |
)
|
74 |
process_btn = gr.Button("Process Video", variant="primary")
|
|
|
83 |
status = gr.Markdown()
|
84 |
|
85 |
analysis_accordion = gr.Accordion(
|
86 |
+
"Chain of thought details",
|
87 |
open=True,
|
88 |
visible=False
|
89 |
)
|
|
|
106 |
|
107 |
try:
|
108 |
duration = get_video_duration_seconds(video)
|
109 |
+
if duration > 1800: # 30 minutes
|
110 |
yield [
|
111 |
+
"Video must be shorter than 30 minutes",
|
112 |
"",
|
113 |
"",
|
114 |
gr.update(visible=False),
|
|
|
122 |
"",
|
123 |
"",
|
124 |
gr.update(visible=False),
|
125 |
+
gr.update(visible=False)
|
126 |
]
|
127 |
|
128 |
model, processor = load_model()
|
129 |
detector = BatchedVideoHighlightDetector(
|
130 |
model,
|
131 |
processor,
|
132 |
+
batch_size=12
|
133 |
)
|
134 |
|
135 |
yield [
|
|
|
141 |
]
|
142 |
|
143 |
video_desc = detector.analyze_video_content(video)
|
144 |
+
formatted_desc = f"### Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
145 |
|
146 |
yield [
|
147 |
"Determining highlight types...",
|
|
|
152 |
]
|
153 |
|
154 |
highlights = detector.determine_highlights(video_desc)
|
155 |
+
formatted_highlights = f"### Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
|
156 |
|
157 |
# Get all segments
|
158 |
segments = get_fixed_30s_segments(video)
|
|
|
223 |
)
|
224 |
|
225 |
return app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
if __name__ == "__main__":
|
228 |
# Initialize CUDA
|
video_highlight_detector.py
CHANGED
@@ -318,7 +318,6 @@ class BatchedVideoHighlightDetector:
|
|
318 |
batch_size=8,
|
319 |
max_frames_per_segment=32,
|
320 |
target_fps=1.0,
|
321 |
-
progress_callback=None
|
322 |
):
|
323 |
self.model = model
|
324 |
self.processor = processor
|
@@ -326,7 +325,6 @@ class BatchedVideoHighlightDetector:
|
|
326 |
self.batch_size = batch_size
|
327 |
self.max_frames_per_segment = max_frames_per_segment
|
328 |
self.target_fps = target_fps
|
329 |
-
self.progress_callback = progress_callback
|
330 |
|
331 |
def _extract_frames_batch(
|
332 |
self,
|
@@ -498,10 +496,7 @@ class BatchedVideoHighlightDetector:
|
|
498 |
for output in outputs
|
499 |
]
|
500 |
|
501 |
-
|
502 |
-
if self.progress_callback:
|
503 |
-
self.progress_callback(segments_processed + len(segments), total_segments)
|
504 |
-
|
505 |
# Check for "yes" in responses
|
506 |
return ["yes" in response for response in responses]
|
507 |
|
|
|
318 |
batch_size=8,
|
319 |
max_frames_per_segment=32,
|
320 |
target_fps=1.0,
|
|
|
321 |
):
|
322 |
self.model = model
|
323 |
self.processor = processor
|
|
|
325 |
self.batch_size = batch_size
|
326 |
self.max_frames_per_segment = max_frames_per_segment
|
327 |
self.target_fps = target_fps
|
|
|
328 |
|
329 |
def _extract_frames_batch(
|
330 |
self,
|
|
|
496 |
for output in outputs
|
497 |
]
|
498 |
|
499 |
+
|
|
|
|
|
|
|
500 |
# Check for "yes" in responses
|
501 |
return ["yes" in response for response in responses]
|
502 |
|