Spaces:
Runtime error
Runtime error
File size: 4,319 Bytes
a0fd97b 0da2b4b a0fd97b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
import gradio as gr
from utils import (
create_gif_from_video_file,
download_youtube_video,
get_num_total_frames,
)
from transformers import pipeline
from huggingface_hub import HfApi, ModelSearchArguments, ModelFilter
FRAME_SAMPLING_RATE = 4
DEFAULT_MODEL = "facebook/timesformer-base-finetuned-k400"
VALID_VIDEOCLASSIFICATION_MODELS = [
"MCG-NJU/videomae-large-finetuned-kinetics",
"facebook/timesformer-base-finetuned-k400",
"fcakyon/timesformer-large-finetuned-k400",
"MCG-NJU/videomae-base-finetuned-kinetics",
"facebook/timesformer-base-finetuned-k600",
"fcakyon/timesformer-large-finetuned-k600",
"facebook/timesformer-hr-finetuned-k400",
"facebook/timesformer-hr-finetuned-k600",
"facebook/timesformer-base-finetuned-ssv2",
"fcakyon/timesformer-large-finetuned-ssv2",
"facebook/timesformer-hr-finetuned-ssv2",
"MCG-NJU/videomae-base-finetuned-ssv2",
"MCG-NJU/videomae-base-short-finetuned-kinetics",
"MCG-NJU/videomae-base-short-ssv2",
"MCG-NJU/videomae-base-short-finetuned-ssv2",
"sayakpaul/videomae-base-finetuned-ucf101-subset",
"nateraw/videomae-base-finetuned-ucf101",
"MCG-NJU/videomae-base-ssv2",
"zahrav/videomae-base-finetuned-ucf101-subset",
]
pipe = pipeline(
task="video-classification",
model=DEFAULT_MODEL,
top_k=5,
frame_sampling_rate=FRAME_SAMPLING_RATE,
)
examples = [
["https://www.youtube.com/watch?v=huAJ9dC5lmI"],
["https://www.youtube.com/watch?v=wvcWt6u5HTg"],
["https://www.youtube.com/watch?v=-3kZSi5qjRM"],
["https://www.youtube.com/watch?v=-6usjfP8hys"],
["https://www.youtube.com/watch?v=BDHub0gBGtc"],
["https://www.youtube.com/watch?v=B9ea7YyCP6E"],
["https://www.youtube.com/watch?v=BBkpaeJBKmk"],
["https://www.youtube.com/watch?v=BBqU8Apee_g"],
["https://www.youtube.com/watch?v=B8OdMwVwyXc"],
["https://www.youtube.com/watch?v=I7cwq6_4QtM"],
["https://www.youtube.com/watch?v=Z0mJDXpNhYA"],
["https://www.youtube.com/watch?v=QkQQjFGnZlg"],
["https://www.youtube.com/watch?v=IQaoRUQif14"],
]
def get_video_model_names():
model_args = ModelSearchArguments()
filter = ModelFilter(
task=model_args.pipeline_tag.VideoClassification,
library=model_args.library.Transformers,
)
api = HfApi()
video_models = list(
iter(api.list_models(filter=filter, sort="downloads", direction=-1))
)
video_models = [video_model.id for video_model in video_models]
return video_models
def select_model(model_name):
global pipe
pipe = pipeline(
task="video-classification",
model=model_name,
top_k=5,
frame_sampling_rate=FRAME_SAMPLING_RATE,
)
def predict(youtube_url_or_file_path):
if youtube_url_or_file_path.startswith("http"):
video_path = download_youtube_video(youtube_url_or_file_path)
else:
video_path = youtube_url_or_file_path
# rearrange sampling rate based on video length and model input length
num_total_frames = get_num_total_frames(video_path)
num_model_input_frames = pipe.model.config.num_frames
if num_total_frames < FRAME_SAMPLING_RATE * num_model_input_frames:
frame_sampling_rate = num_total_frames // num_model_input_frames
else:
frame_sampling_rate = FRAME_SAMPLING_RATE
gif_path = create_gif_from_video_file(
video_path, frame_sampling_rate=frame_sampling_rate, save_path="video.gif"
)
# run inference
results = pipe(videos=video_path, frame_sampling_rate=frame_sampling_rate)
os.remove(video_path)
label_to_score = {result["label"]: result["score"] for result in results}
return label_to_score, gif_path
app = gr.Interface(
fn=predict,
inputs=[
gr.Interface.Dropdown(
choices=VALID_VIDEOCLASSIFICATION_MODELS,
label="Model:",
show_label=True,
value=DEFAULT_MODEL,
),
gr.Interface.Textbox(label="Youtube URL:", show_label=True),
gr.Interface.Video(label="Video File:", show_label=True),
],
outputs=[
gr.Interface.Label(label="Predictions:", show_label=True, num_top_classes=5),
gr.Interface.Image(label="Input Clip", show_label=True),
],
layout="vertical",
)
app.launch()
|