SmolVLM2-XSPFGenerator

Running on Zero

App Files Files Community

Miquel Farre commited on Feb 11

Commit

e50a69b

1 Parent(s): 93c34ad

.

Browse files

Files changed (2) hide show

app.py +7 -4
video_highlight_detector.py +0 -54

app.py CHANGED Viewed

@@ -78,7 +78,8 @@ def process_video(
         add_watermark(temp_output, output_path)
         os.unlink(temp_output)
         video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
         highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
@@ -125,7 +126,9 @@ def create_ui(examples_path: str):
                 label="Upload your video (max 20 minutes)",
                 interactive=True
             )
-            process_btn = gr.Button("Process Video", variant="primary")
         status = gr.Markdown(visible=True)
@@ -139,7 +142,7 @@ def create_ui(examples_path: str):
             output_video = gr.Video(label="Highlight Video", visible=False)
             download_btn = gr.Button("Download Highlights", visible=False)
-        def on_process(video):
             if not video:
                 return {
                     status: "Please upload a video",
@@ -150,7 +153,7 @@ def create_ui(examples_path: str):
                 }
             status.value = "Processing video..."
-            output_path, desc, highlights, err = process_video(video)
             if err:
                 return {

         add_watermark(temp_output, output_path)
         os.unlink(temp_output)
+        progress(1.0, desc="Complete!")
         video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
         highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
                 label="Upload your video (max 20 minutes)",
                 interactive=True
             )
+        gr.Progress()
+        process_btn = gr.Button("Process Video", variant="primary")
         status = gr.Markdown(visible=True)
             output_video = gr.Video(label="Highlight Video", visible=False)
             download_btn = gr.Button("Download Highlights", visible=False)
+        def on_process(video, progress=gr.Progress()):
             if not video:
                 return {
                     status: "Please upload a video",
                 }
             status.value = "Processing video..."
+            output_path, desc, highlights, err = process_video(video, progress=progress)
             if err:
                 return {

video_highlight_detector.py CHANGED Viewed

@@ -768,57 +768,3 @@ def load_model(
     return model, processor
-# def load_model(
-#     checkpoint_path: Optional[str] = None,
-#     base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
-#     device: str = "cuda"
-# ):
-#     """Load the model and processor."""
-#     # For demonstration, we set the target size
-#     video_target_size = 384
-#     processor = AutoProcessor.from_pretrained(base_model_id)
-#     # Configure the image processor
-#     processor.image_processor.size = {"longest_edge": video_target_size}
-#     processor.image_processor.do_resize = True
-#     processor.image_processor.do_image_splitting = False
-#     if checkpoint_path:
-#         model = SmolVLMForConditionalGeneration.from_pretrained(
-#             checkpoint_path,
-#             torch_dtype=torch.bfloat16,
-#             device_map=device
-#         )
-#     else:
-#         model =  SmolVLMForConditionalGeneration.from_pretrained(
-#             base_model_id,
-#             torch_dtype=torch.bfloat16,
-#             device_map=device
-#         )
-#     return model, processor
-def main():
-    checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
-    base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model, processor = load_model(checkpoint_path, base_model_id, device)
-    detector = BatchedVideoHighlightDetector(model, processor, device=device)
-    if len(sys.argv) < 3:
-        print("Usage: python video_highlight_detector.py <input_video> <output_video>")
-        sys.exit(1)
-    video_path = sys.argv[1]
-    output_path = sys.argv[2]
-    # Create highlight video
-    highlight_segments = detector.create_highlight_video(video_path, output_path)
-    print(f"Created highlight video with {len(highlight_segments)} segments")
-if __name__ == "__main__":
-    main()


768
769	return model, processor
770