File size: 3,519 Bytes
ef1c94f
79d80e3
 
ef1c94f
79d80e3
 
 
ef1c94f
79d80e3
 
 
 
 
ef1c94f
79d80e3
 
 
ef1c94f
 
79d80e3
 
 
24860f2
 
 
 
 
 
 
 
 
 
 
ef1c94f
 
24860f2
ef1c94f
 
 
 
24860f2
79d80e3
 
 
 
 
 
ef1c94f
79d80e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef1c94f
79d80e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef1c94f
79d80e3
 
 
 
 
ef1c94f
 
79d80e3
 
 
 
ef1c94f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr 
from run_on_video.run import MomentDETRPredictor
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

ckpt_path = "run_on_video/moment_detr_ckpt/model_best.ckpt"
clip_model_name_or_path = "ViT-B/32"
device = 'cpu'

moment_detr_predictor = MomentDETRPredictor(
    ckpt_path=ckpt_path,
    clip_model_name_or_path=clip_model_name_or_path,
    device=device
)

def trim_video(video_path, start, end, output_file='result.mp4'):
    ffmpeg_extract_subclip(video_path, start, end, targetname=output_file)
    return output_file

with gr.Blocks() as demo:
    output_videos = gr.State([])
    moment_scores = gr.State([])
    gr.HTML("""<h2 align="center"> ✍️ Highlight Detection with MomentDETR </h2>""")
    with gr.Column():
        with gr.Row():
            with gr.Blocks():
                with gr.Column():
                    gr.HTML("""<h3 align="center"> Input Video </h3>""")
                    input_video = gr.PlayableVideo()
            with gr.Blocks():
                with gr.Column(): 
                    gr.HTML("""<h3 align="center"> Highlight Videos </h3>""")
                    playable_video = gr.PlayableVideo()
        with gr.Row():
            with gr.Column():
                retrieval_text = gr.Textbox(
                    label="Query text", 
                    placeholder="What should be highlighted?",
                    visible=True
                )
                submit =gr.Button("Submit")
            with gr.Column():
                display_score = gr.Markdown("### Moment Score: ")
                radio_button = gr.Radio(
                    choices=[i for i in range(10)], 
                    label="Moments", 
                    value=0
                )

        def update_video_player(radio_value, output_videos, moment_scores):
            return {
                playable_video: output_videos[radio_value], 
                display_score: f'### Moment Score: {moment_scores[radio_value]}'
            }
                
    def submit_video(input_video, retrieval_text):
        print(f'== video path: {input_video}')
        print(f'== retrieval_text: {retrieval_text}')
        if retrieval_text is None:
            retrieval_text = ''
        predictions, video_frames = moment_detr_predictor.localize_moment(
            video_path=input_video, 
            query_list=[retrieval_text]
        )
        pred_windows = [[pred[0], pred[1]]for pred in predictions[0]['pred_relevant_windows']]
        scores = [pred[-1] for pred in predictions[0]['pred_relevant_windows']]

        print(f'== predict start end time: {pred_windows}')
        print(f'== prediction scores: {scores}')
        output_files = [ trim_video(
            video_path=input_video, 
            start=pred_windows[i][0], 
            end=pred_windows[i][1],
            output_file=f'{i}.mp4'
        ) for i in range(10)]
        print(f'== output_files: {output_files}')
        return { 
            output_videos: output_files, 
            moment_scores: scores,
            playable_video:  output_files[0],
            display_score: f'### Moment Score: {scores[0]}'
        }

    radio_button.change(
        fn=update_video_player, 
        inputs=[radio_button, output_videos, moment_scores],
        outputs=[playable_video, display_score]
    )

    submit.click(
        fn=submit_video, 
        inputs=[input_video, retrieval_text], 
        outputs=[output_videos, moment_scores, playable_video, display_score]
    )

demo.launch()