Spaces:

AnsenH
/

Highlight_Detection_with_MomentDETR

Runtime error

App Files Files Community

AnsenH commited on Aug 30, 2023

Commit

c639a10

1 Parent(s): ec35ab9

chore: handle mov to mp4 conversion

Browse files

Files changed (1) hide show

app.py +36 -15

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from run_on_video.run import MomentDETRPredictor
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import torch
 from lbhd.infer import lbhd_predict
 DESCRIPTION = """
 _This Space demonstrates model [QVHighlights: Detecting Moments and Highlights in Videos via Natural Language Queries](https://arxiv.org/abs/2107.09609), NeurIPS 2021, by [Jie Lei](http://www.cs.unc.edu/~jielei/), [Tamara L. Berg](http://tamaraberg.com/), [Mohit Bansal](http://www.cs.unc.edu/~mbansal/)_
@@ -78,10 +80,15 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             }
     def submit_video(input_video, retrieval_text):
         print(f'== video path: {input_video}')
         print(f'== retrieval_text: {retrieval_text}')
         if input_video is None:
-            return [None, None, None, None, None, None, None, None, 1]
         if retrieval_text is None:
             retrieval_text = ''
         predictions, video_frames = moment_detr_predictor.localize_moment(
@@ -90,7 +97,7 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         )
         predictions = predictions[0]['pred_relevant_windows']
         output_files = [ trim_video(
-            video_path=input_video,
             start=predictions[i][0],
             end=predictions[i][1],
             output_file=f'{i}.mp4'
@@ -99,23 +106,37 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         lbhd_predictions = lbhd_predict(input_video)
         print(f'== lbhd_predictions: {lbhd_predictions}')
         output_files_lbhd = [ trim_video(
-            video_path=input_video,
             start=lbhd_predictions[i][0],
             end=lbhd_predictions[i][1],
             output_file=f'{i}_lbhd.mp4'
         ) for i in range(min(10, len(lbhd_predictions)))]
-        return {
-            output_videos: output_files,
-            output_lbhd_videos: output_files_lbhd,
-            moment_prediction: predictions,
-            our_prediction: lbhd_predictions,
-            playable_video:  output_files[0],
-            our_result_video: output_files_lbhd[0],
-            display_score: display_prediction(predictions[0]),
-            display_clip_score: display_prediction(lbhd_predictions[0]),
-            radio_button: 1
-        }
     radio_button.change(
         fn=update_video_player,
@@ -126,7 +147,7 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     submit.click(
         fn=submit_video,
         inputs=[input_video, retrieval_text],
-        outputs=[output_videos, output_lbhd_videos, moment_prediction, our_prediction, playable_video, our_result_video, display_score, display_clip_score, radio_button]
     )
 demo.launch()

 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import torch
 from lbhd.infer import lbhd_predict
+import os
+import subprocess
 DESCRIPTION = """
 _This Space demonstrates model [QVHighlights: Detecting Moments and Highlights in Videos via Natural Language Queries](https://arxiv.org/abs/2107.09609), NeurIPS 2021, by [Jie Lei](http://www.cs.unc.edu/~jielei/), [Tamara L. Berg](http://tamaraberg.com/), [Mohit Bansal](http://www.cs.unc.edu/~mbansal/)_
             }
     def submit_video(input_video, retrieval_text):
+        ext = os.path.splitext(input_video)[-1].lower()
+        if ext == ".mov":
+            output_file = os.path.join(input_video.replace(".mov", ".mp4"))
+            subprocess.call(['ffmpeg', '-i', input_video, output_file])
         print(f'== video path: {input_video}')
         print(f'== retrieval_text: {retrieval_text}')
         if input_video is None:
+            return [None, None, None, None, None, None, None, None, None, 1]
         if retrieval_text is None:
             retrieval_text = ''
         predictions, video_frames = moment_detr_predictor.localize_moment(
         )
         predictions = predictions[0]['pred_relevant_windows']
         output_files = [ trim_video(
+            video_path= output_file if ext == ".mov" else input_video,
             start=predictions[i][0],
             end=predictions[i][1],
             output_file=f'{i}.mp4'
         lbhd_predictions = lbhd_predict(input_video)
         print(f'== lbhd_predictions: {lbhd_predictions}')
         output_files_lbhd = [ trim_video(
+            video_path= output_file if ext == ".mov" else input_video,
             start=lbhd_predictions[i][0],
             end=lbhd_predictions[i][1],
             output_file=f'{i}_lbhd.mp4'
         ) for i in range(min(10, len(lbhd_predictions)))]
+        return [
+            output_file if ext == ".mov" else input_video,
+            output_files,
+            output_files_lbhd,
+            predictions,
+            lbhd_predictions,
+            output_files[0],
+            output_files_lbhd[0],
+            display_prediction(predictions[0]),
+            display_prediction(lbhd_predictions[0]),
+            1
+        ]
+        # return {
+        #     input_video: output_file if ext == ".mov" else input_video,
+        #     output_videos: output_files,
+        #     output_lbhd_videos: output_files_lbhd,
+        #     moment_prediction: predictions,
+        #     our_prediction: lbhd_predictions,
+        #     playable_video:  output_files[0],
+        #     our_result_video: output_files_lbhd[0],
+        #     display_score: display_prediction(predictions[0]),
+        #     display_clip_score: display_prediction(lbhd_predictions[0]),
+        #     radio_button: 1
+        # }
     radio_button.change(
         fn=update_video_player,
     submit.click(
         fn=submit_video,
         inputs=[input_video, retrieval_text],
+        outputs=[input_video, output_videos, output_lbhd_videos, moment_prediction, our_prediction, playable_video, our_result_video, display_score, display_clip_score, radio_button]
     )
 demo.launch()