Spaces:
Runtime error
Runtime error
import gradio as gr | |
from run_on_video.run import MomentDETRPredictor | |
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip | |
import torch | |
DESCRIPTION = """ | |
_This Space demonstrates model [QVHighlights: Detecting Moments and Highlights in Videos via Natural Language Queries](https://arxiv.org/abs/2107.09609), NeurIPS 2021, by [Jie Lei](http://www.cs.unc.edu/~jielei/), [Tamara L. Berg](http://tamaraberg.com/), [Mohit Bansal](http://www.cs.unc.edu/~mbansal/)_ | |
""" | |
ckpt_path = "run_on_video/moment_detr_ckpt/model_best.ckpt" | |
clip_model_name_or_path = "ViT-B/32" | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
moment_detr_predictor = MomentDETRPredictor( | |
ckpt_path=ckpt_path, | |
clip_model_name_or_path=clip_model_name_or_path, | |
device=device | |
) | |
def trim_video(video_path, start, end, output_file='result.mp4'): | |
ffmpeg_extract_subclip(video_path, start, end, targetname=output_file) | |
return output_file | |
def display_prediction(result): | |
return f'### Moment Start time: {result[0]}, End time: {result[1]}, Score: {result[2]}' | |
with gr.Blocks(theme=gr.themes.Default()) as demo: | |
output_videos = gr.State(None) | |
moment_prediction = gr.State(None) | |
gr.HTML("""<h2 align="center"> ๐๏ธ Highlight Detection with MomentDETR </h2>""") | |
gr.Markdown(DESCRIPTION) | |
with gr.Column(): | |
with gr.Row(): | |
with gr.Blocks(): | |
with gr.Column(): | |
gr.HTML("""<h3 align="center"> Input Video </h3>""") | |
input_video = gr.Video(label="Please input mp4", height=400) | |
with gr.Blocks(): | |
with gr.Column(): | |
gr.HTML("""<h3 align="center"> Highlight Videos </h3>""") | |
playable_video = gr.Video(height=400) | |
with gr.Row(): | |
with gr.Column(): | |
retrieval_text = gr.Textbox( | |
label="Query text", | |
placeholder="What should be highlighted?", | |
visible=True | |
) | |
submit = gr.Button("Submit") | |
with gr.Column(): | |
radio_button = gr.Radio( | |
choices=[i+1 for i in range(10)], | |
label="Moments", | |
value=1 | |
) | |
display_score = gr.Markdown("### Moment Score: ") | |
def update_video_player(radio_value, output_videos, moment_prediction): | |
if output_videos is None or moment_prediction is None: | |
return [None, None] | |
return { | |
playable_video: output_videos[radio_value-1], | |
display_score: display_prediction(moment_prediction[radio_value-1]) | |
} | |
def submit_video(input_video, retrieval_text): | |
print(f'== video path: {input_video}') | |
print(f'== retrieval_text: {retrieval_text}') | |
if input_video is None: | |
return [None, None, None, None, 1] | |
if retrieval_text is None: | |
retrieval_text = '' | |
predictions, video_frames = moment_detr_predictor.localize_moment( | |
video_path=input_video, | |
query_list=[retrieval_text] | |
) | |
predictions = predictions[0]['pred_relevant_windows'] | |
pred_windows = [[pred[0], pred[1]]for pred in predictions] | |
output_files = [ trim_video( | |
video_path=input_video, | |
start=pred_windows[i][0], | |
end=pred_windows[i][1], | |
output_file=f'{i}.mp4' | |
) for i in range(10)] | |
return { | |
output_videos: output_files, | |
moment_prediction: predictions, | |
playable_video: output_files[0], | |
display_score: display_prediction(predictions[0]), | |
radio_button: 1 | |
} | |
radio_button.change( | |
fn=update_video_player, | |
inputs=[radio_button, output_videos, moment_prediction], | |
outputs=[playable_video, display_score] | |
) | |
submit.click( | |
fn=submit_video, | |
inputs=[input_video, retrieval_text], | |
outputs=[output_videos, moment_prediction, playable_video, display_score, radio_button] | |
) | |
demo.launch() |