mfarre HF staff commited on
Commit
e50a69b
·
1 Parent(s): 93c34ad
Files changed (2) hide show
  1. app.py +7 -4
  2. video_highlight_detector.py +0 -54
app.py CHANGED
@@ -78,7 +78,8 @@ def process_video(
78
  add_watermark(temp_output, output_path)
79
 
80
  os.unlink(temp_output)
81
-
 
82
  video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
83
  highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
84
 
@@ -125,7 +126,9 @@ def create_ui(examples_path: str):
125
  label="Upload your video (max 20 minutes)",
126
  interactive=True
127
  )
128
- process_btn = gr.Button("Process Video", variant="primary")
 
 
129
 
130
  status = gr.Markdown(visible=True)
131
 
@@ -139,7 +142,7 @@ def create_ui(examples_path: str):
139
  output_video = gr.Video(label="Highlight Video", visible=False)
140
  download_btn = gr.Button("Download Highlights", visible=False)
141
 
142
- def on_process(video):
143
  if not video:
144
  return {
145
  status: "Please upload a video",
@@ -150,7 +153,7 @@ def create_ui(examples_path: str):
150
  }
151
 
152
  status.value = "Processing video..."
153
- output_path, desc, highlights, err = process_video(video)
154
 
155
  if err:
156
  return {
 
78
  add_watermark(temp_output, output_path)
79
 
80
  os.unlink(temp_output)
81
+ progress(1.0, desc="Complete!")
82
+
83
  video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
84
  highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
85
 
 
126
  label="Upload your video (max 20 minutes)",
127
  interactive=True
128
  )
129
+
130
+ gr.Progress()
131
+ process_btn = gr.Button("Process Video", variant="primary")
132
 
133
  status = gr.Markdown(visible=True)
134
 
 
142
  output_video = gr.Video(label="Highlight Video", visible=False)
143
  download_btn = gr.Button("Download Highlights", visible=False)
144
 
145
+ def on_process(video, progress=gr.Progress()):
146
  if not video:
147
  return {
148
  status: "Please upload a video",
 
153
  }
154
 
155
  status.value = "Processing video..."
156
+ output_path, desc, highlights, err = process_video(video, progress=progress)
157
 
158
  if err:
159
  return {
video_highlight_detector.py CHANGED
@@ -768,57 +768,3 @@ def load_model(
768
 
769
  return model, processor
770
 
771
-
772
- # def load_model(
773
- # checkpoint_path: Optional[str] = None,
774
- # base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
775
- # device: str = "cuda"
776
- # ):
777
- # """Load the model and processor."""
778
- # # For demonstration, we set the target size
779
- # video_target_size = 384
780
-
781
- # processor = AutoProcessor.from_pretrained(base_model_id)
782
- # # Configure the image processor
783
- # processor.image_processor.size = {"longest_edge": video_target_size}
784
- # processor.image_processor.do_resize = True
785
- # processor.image_processor.do_image_splitting = False
786
-
787
- # if checkpoint_path:
788
- # model = SmolVLMForConditionalGeneration.from_pretrained(
789
- # checkpoint_path,
790
- # torch_dtype=torch.bfloat16,
791
- # device_map=device
792
- # )
793
- # else:
794
- # model = SmolVLMForConditionalGeneration.from_pretrained(
795
- # base_model_id,
796
- # torch_dtype=torch.bfloat16,
797
- # device_map=device
798
- # )
799
-
800
- # return model, processor
801
-
802
-
803
- def main():
804
- checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
805
- base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"
806
- device = "cuda" if torch.cuda.is_available() else "cpu"
807
-
808
- model, processor = load_model(checkpoint_path, base_model_id, device)
809
- detector = BatchedVideoHighlightDetector(model, processor, device=device)
810
-
811
- if len(sys.argv) < 3:
812
- print("Usage: python video_highlight_detector.py <input_video> <output_video>")
813
- sys.exit(1)
814
-
815
- video_path = sys.argv[1]
816
- output_path = sys.argv[2]
817
-
818
- # Create highlight video
819
- highlight_segments = detector.create_highlight_video(video_path, output_path)
820
- print(f"Created highlight video with {len(highlight_segments)} segments")
821
-
822
-
823
- if __name__ == "__main__":
824
- main()
 
768
 
769
  return model, processor
770