Spaces:
Runtime error
Runtime error
File size: 3,762 Bytes
df8cba4 967933e 2fb5d8c df8cba4 967933e df8cba4 967933e df8cba4 fe1f7ad 967933e fe1f7ad 967933e fe1f7ad 967933e fe1f7ad 57002ee 967933e 2fb5d8c 967933e df8cba4 967933e df8cba4 967933e fe1f7ad 57002ee df8cba4 967933e 2fb5d8c fe1f7ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# -*- coding: utf-8 -*-
"""🎬 Keras Video Classification CNN-RNN model
Spaces for showing the model usage.
Author:
- Thomas Chaigneau @ChainYo
"""
import os
import cv2
import imageio
import gradio as gr
import numpy as np
from tensorflow import keras
from tensorflow_docs.vis import embed
from huggingface_hub import from_pretrained_keras
IMG_SIZE = 224
NUM_FEATURES = 2048
model = from_pretrained_keras("keras-io/video-classification-cnn-rnn")
samples = []
for file in os.listdir("samples"):
tag = file.split("_")[1]
samples.append([f"samples/{file}", 20])
def crop_center_square(frame):
y, x = frame.shape[0:2]
min_dim = min(y, x)
start_x = (x // 2) - (min_dim // 2)
start_y = (y // 2) - (min_dim // 2)
return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]
def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
cap = cv2.VideoCapture(path)
frames = []
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame = crop_center_square(frame)
frame = cv2.resize(frame, resize)
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
return np.array(frames)
def build_feature_extractor():
feature_extractor = keras.applications.InceptionV3(
weights="imagenet",
include_top=False,
pooling="avg",
input_shape=(IMG_SIZE, IMG_SIZE, 3),
)
preprocess_input = keras.applications.inception_v3.preprocess_input
inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
preprocessed = preprocess_input(inputs)
outputs = feature_extractor(preprocessed)
return keras.Model(inputs, outputs, name="feature_extractor")
feature_extractor = build_feature_extractor()
def prepare_video(frames, max_seq_length: int = 20):
frames = frames[None, ...]
frame_mask = np.zeros(shape=(1, max_seq_length,), dtype="bool")
frame_features = np.zeros(shape=(1, max_seq_length, NUM_FEATURES), dtype="float32")
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(max_seq_length, video_length)
for j in range(length):
frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked
return frame_features, frame_mask
def sequence_prediction(path):
class_vocab = ["CricketShot", "PlayingCello", "Punch", "ShavingBeard", "TennisSwing"]
frames = load_video(path)
frame_features, frame_mask = prepare_video(frames)
probabilities = model.predict([frame_features, frame_mask])[0]
preds = {}
for i in np.argsort(probabilities)[::-1]:
preds[class_vocab[i]] = float(probabilities[i])
gif = to_gif(frames)
return preds, gif
def to_gif(images):
converted_images = images.astype(np.uint8)
imageio.mimsave("animation.gif", converted_images, fps=10)
return embed.embed_file(converted_images)
article = article = "<div style='text-align: center;'><a href='https://github.com/ChainYo' target='_blank'>Space by Thomas Chaigneau</a><br><a href='https://keras.io/examples/vision/video_classification/' target='_blank'>Keras example by Sayak Paul</a></div>"
app = gr.Interface(
sequence_prediction,
inputs=[gr.inputs.Video(label="Video", type="avi")],
outputs=[
gr.outputs.Label(label="Prediction", type="confidences"),
gr.outputs.Image(label="GIF", type="gif"),
],
title="Keras Video Classification CNN-RNN model",
description="Keras Working Group",
article=article,
# examples=samples
).launch(enable_queue=True) |