Spaces:
Running
on
A100
Running
on
A100
- app.py +7 -4
- video_highlight_detector.py +0 -54
app.py
CHANGED
@@ -78,7 +78,8 @@ def process_video(
|
|
78 |
add_watermark(temp_output, output_path)
|
79 |
|
80 |
os.unlink(temp_output)
|
81 |
-
|
|
|
82 |
video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
|
83 |
highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
|
84 |
|
@@ -125,7 +126,9 @@ def create_ui(examples_path: str):
|
|
125 |
label="Upload your video (max 20 minutes)",
|
126 |
interactive=True
|
127 |
)
|
128 |
-
|
|
|
|
|
129 |
|
130 |
status = gr.Markdown(visible=True)
|
131 |
|
@@ -139,7 +142,7 @@ def create_ui(examples_path: str):
|
|
139 |
output_video = gr.Video(label="Highlight Video", visible=False)
|
140 |
download_btn = gr.Button("Download Highlights", visible=False)
|
141 |
|
142 |
-
def on_process(video):
|
143 |
if not video:
|
144 |
return {
|
145 |
status: "Please upload a video",
|
@@ -150,7 +153,7 @@ def create_ui(examples_path: str):
|
|
150 |
}
|
151 |
|
152 |
status.value = "Processing video..."
|
153 |
-
output_path, desc, highlights, err = process_video(video)
|
154 |
|
155 |
if err:
|
156 |
return {
|
|
|
78 |
add_watermark(temp_output, output_path)
|
79 |
|
80 |
os.unlink(temp_output)
|
81 |
+
progress(1.0, desc="Complete!")
|
82 |
+
|
83 |
video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
|
84 |
highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
|
85 |
|
|
|
126 |
label="Upload your video (max 20 minutes)",
|
127 |
interactive=True
|
128 |
)
|
129 |
+
|
130 |
+
gr.Progress()
|
131 |
+
process_btn = gr.Button("Process Video", variant="primary")
|
132 |
|
133 |
status = gr.Markdown(visible=True)
|
134 |
|
|
|
142 |
output_video = gr.Video(label="Highlight Video", visible=False)
|
143 |
download_btn = gr.Button("Download Highlights", visible=False)
|
144 |
|
145 |
+
def on_process(video, progress=gr.Progress()):
|
146 |
if not video:
|
147 |
return {
|
148 |
status: "Please upload a video",
|
|
|
153 |
}
|
154 |
|
155 |
status.value = "Processing video..."
|
156 |
+
output_path, desc, highlights, err = process_video(video, progress=progress)
|
157 |
|
158 |
if err:
|
159 |
return {
|
video_highlight_detector.py
CHANGED
@@ -768,57 +768,3 @@ def load_model(
|
|
768 |
|
769 |
return model, processor
|
770 |
|
771 |
-
|
772 |
-
# def load_model(
|
773 |
-
# checkpoint_path: Optional[str] = None,
|
774 |
-
# base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
|
775 |
-
# device: str = "cuda"
|
776 |
-
# ):
|
777 |
-
# """Load the model and processor."""
|
778 |
-
# # For demonstration, we set the target size
|
779 |
-
# video_target_size = 384
|
780 |
-
|
781 |
-
# processor = AutoProcessor.from_pretrained(base_model_id)
|
782 |
-
# # Configure the image processor
|
783 |
-
# processor.image_processor.size = {"longest_edge": video_target_size}
|
784 |
-
# processor.image_processor.do_resize = True
|
785 |
-
# processor.image_processor.do_image_splitting = False
|
786 |
-
|
787 |
-
# if checkpoint_path:
|
788 |
-
# model = SmolVLMForConditionalGeneration.from_pretrained(
|
789 |
-
# checkpoint_path,
|
790 |
-
# torch_dtype=torch.bfloat16,
|
791 |
-
# device_map=device
|
792 |
-
# )
|
793 |
-
# else:
|
794 |
-
# model = SmolVLMForConditionalGeneration.from_pretrained(
|
795 |
-
# base_model_id,
|
796 |
-
# torch_dtype=torch.bfloat16,
|
797 |
-
# device_map=device
|
798 |
-
# )
|
799 |
-
|
800 |
-
# return model, processor
|
801 |
-
|
802 |
-
|
803 |
-
def main():
|
804 |
-
checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
|
805 |
-
base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"
|
806 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
807 |
-
|
808 |
-
model, processor = load_model(checkpoint_path, base_model_id, device)
|
809 |
-
detector = BatchedVideoHighlightDetector(model, processor, device=device)
|
810 |
-
|
811 |
-
if len(sys.argv) < 3:
|
812 |
-
print("Usage: python video_highlight_detector.py <input_video> <output_video>")
|
813 |
-
sys.exit(1)
|
814 |
-
|
815 |
-
video_path = sys.argv[1]
|
816 |
-
output_path = sys.argv[2]
|
817 |
-
|
818 |
-
# Create highlight video
|
819 |
-
highlight_segments = detector.create_highlight_video(video_path, output_path)
|
820 |
-
print(f"Created highlight video with {len(highlight_segments)} segments")
|
821 |
-
|
822 |
-
|
823 |
-
if __name__ == "__main__":
|
824 |
-
main()
|
|
|
768 |
|
769 |
return model, processor
|
770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|