|
import os |
|
os.system("git clone https://github.com/google-research/frame-interpolation") |
|
import sys |
|
sys.path.append("frame-interpolation") |
|
|
|
import math |
|
import cv2 |
|
import numpy as np |
|
import tensorflow as tf |
|
import mediapy |
|
from PIL import Image |
|
|
|
import gradio as gr |
|
|
|
from huggingface_hub import snapshot_download |
|
from image_tools.sizes import resize_and_crop |
|
|
|
|
|
model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style") |
|
from eval import interpolator, util |
|
interpolator = interpolator.Interpolator(model, None) |
|
|
|
ffmpeg_path = util.get_ffmpeg_path() |
|
mediapy.set_ffmpeg(ffmpeg_path) |
|
|
|
|
|
def do_interpolation(frame1, frame2, interpolation, n): |
|
print("tween frames: " + str(interpolation)) |
|
print(frame1, frame2) |
|
input_frames = [frame1, frame2] |
|
frames = list( |
|
util.interpolate_recursively_from_files( |
|
input_frames, int(interpolation), interpolator)) |
|
|
|
|
|
mediapy.write_video(f"{n}_to_{n+1}_out.mp4", frames, fps=25) |
|
return f"{n}_to_{n+1}_out.mp4" |
|
|
|
def get_frames(video_in, step, name, n): |
|
frames = [] |
|
cap = cv2.VideoCapture(video_in) |
|
cframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
cfps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
print(f'frames: {cframes}, fps: {cfps}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
print("video fps: " + str(fps)) |
|
i=0 |
|
while(cap.isOpened()): |
|
ret, frame = cap.read() |
|
if ret == False: |
|
break |
|
|
|
|
|
|
|
|
|
cv2.imwrite(f"{str(n)}_{name}_{step}{str(i)}.png", frame) |
|
frames.append(f"{str(n)}_{name}_{step}{str(i)}.png") |
|
i+=1 |
|
|
|
cap.release() |
|
cv2.destroyAllWindows() |
|
print("broke the video into frames") |
|
|
|
return frames, fps |
|
|
|
|
|
def create_video(frames, fps, type): |
|
print("building video result") |
|
imgs = [] |
|
for j, img in enumerate(frames): |
|
imgs.append(cv2.cvtColor(cv2.imread(img).astype(np.uint8), cv2.COLOR_BGR2RGB)) |
|
|
|
mediapy.write_video(type + "_result.mp4", imgs, fps=fps) |
|
return type + "_result.mp4" |
|
|
|
|
|
def infer(f_in, interpolation, fps_output): |
|
|
|
fps_output = logscale(fps_output) |
|
|
|
|
|
frames_list = f_in |
|
fps = 1 |
|
print(f"ORIGIN FPS: {fps}") |
|
n_frame = int(300) |
|
|
|
|
|
if n_frame >= len(frames_list): |
|
print("video is shorter than the cut value") |
|
n_frame = len(frames_list) |
|
|
|
|
|
result_frames = [] |
|
print("set stop frames to: " + str(n_frame)) |
|
|
|
for idx, frame in enumerate(frames_list[0:int(n_frame)]): |
|
if idx < len(frames_list) - 1: |
|
next_frame = frames_list[idx+1] |
|
|
|
interpolated_frames = do_interpolation(frame, next_frame, interpolation, idx) |
|
break_interpolated_video = get_frames(interpolated_frames, "interpol", f"{idx}_", -1) |
|
print(break_interpolated_video[0]) |
|
for j, img in enumerate(break_interpolated_video[0][0:len(break_interpolated_video[0])-1]): |
|
print(f"IMG:{img}") |
|
os.rename(img, f"{idx}_to_{idx+1}_{j}.png") |
|
result_frames.append(f"{idx}_to_{idx+1}_{j}.png") |
|
|
|
print("frames " + str(idx) + " & " + str(idx+1) + "/" + str(n_frame) + ": done;") |
|
|
|
result_frames.append(f"{frames_list[n_frame-1]}") |
|
final_vid = create_video(result_frames, fps_output, "interpolated") |
|
|
|
files = final_vid |
|
print("interpolated frames: " + str(len(frames_list)) + " -> " + str(len(result_frames))) |
|
cv2.destroyAllWindows() |
|
|
|
return final_vid, files |
|
|
|
|
|
def remove_bg(fl, s, l, v, l_t): |
|
frame = cv2.imread(fl).astype(np.uint8) |
|
|
|
b = 5 |
|
|
|
bg = cv2.medianBlur(frame, 255) |
|
|
|
frame_ = ((bg.astype(np.int16)-frame.astype(np.int16))+127).astype(np.uint8) |
|
frame_ = cv2.bilateralFilter(frame_, b*4, b*8, b*2) |
|
frame_ = cv2.medianBlur(frame_, b) |
|
|
|
element = cv2.getStructuringElement(cv2.MORPH_RECT, (2*b+1, 2*b+1), (b,b)) |
|
frame_ = cv2.erode(cv2.dilate(frame_, element), element) |
|
|
|
|
|
|
|
bg_gray = cv2.cvtColor(cv2.cvtColor(bg, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR) |
|
bg_diff = (bg-bg_gray).astype(np.int16) |
|
frame_c = (frame.astype(np.int16)-bg_diff).astype(np.uint8) |
|
|
|
|
|
hsv_ = cv2.cvtColor(frame_c, cv2.COLOR_BGR2HSV) |
|
edges = cv2.Laplacian(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), cv2.CV_64F) |
|
blur_s = np.zeros_like(edges) |
|
for i in range(2, frame.shape[0]-2): |
|
for j in range(2, frame.shape[1]-2): |
|
d = edges[i-2:i+2, j-2:j+2].var() |
|
blur_s[i,j] = d.astype(np.uint8) |
|
|
|
print(fl) |
|
print("detail") |
|
print(np.average(blur_s)) |
|
print(np.median(blur_s)) |
|
print("saturation") |
|
print(np.average(hsv_[:,:,1])) |
|
print(np.median(hsv_[:,:,1])) |
|
print("lightness") |
|
print(np.average(hsv_[:,:,2])) |
|
print(np.median(hsv_[:,:,2])) |
|
|
|
|
|
if l_t == "slider": |
|
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180,s,l])) |
|
mask = cv2.inRange(blur_s, 0, v) |
|
elif l_t == "average": |
|
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.average(hsv_[:,:,1])), int(np.average(hsv_[:,:,2]))])) |
|
mask = cv2.inRange(blur_s, 0, int(np.average(blur_s))) |
|
elif l_t == "median": |
|
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.median(hsv_[:,:,1])), int(np.median(hsv_[:,:,2]))])) |
|
mask = cv2.inRange(blur_s, 0, int(np.median(blur_s))) |
|
|
|
masks = np.bitwise_and(m, mask) |
|
frame_[masks==0] = (0,0,0) |
|
|
|
|
|
m_ = frame_.reshape((-1,3)) |
|
|
|
m_ = np.float32(m_) |
|
|
|
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 16, 1.0) |
|
K = 3 |
|
ret,label,center=cv2.kmeans(m_, K, None, criteria, 16, cv2.KMEANS_PP_CENTERS) |
|
|
|
center = np.uint8(center) |
|
res = center[label.flatten()] |
|
frame_ = res.reshape((frame_.shape)) |
|
|
|
|
|
|
|
m_ = cv2.inRange(frame_, np.array([128,128,128]), np.array([255,255,255])) |
|
frame_[m_>0] = (255,255,255) |
|
cv2.rectangle(frame_,(0,0),(frame_.shape[1]-1,frame_.shape[0]-1),(255,255,255),7) |
|
mask = cv2.floodFill(frame_, None, (0, 0), 255, 0, 0, (4 | cv2.FLOODFILL_FIXED_RANGE))[2] |
|
|
|
mask = mask[1:mask.shape[0]-1, 1:mask.shape[1]-1] |
|
frame_[mask>0] = (127,127,127) |
|
m_ = cv2.inRange(frame_, np.array([1,1,1]), np.array([127,127,127])) |
|
frame_[m_>0] = (127,127,127) |
|
|
|
|
|
|
|
frame_ = 255 - cv2.cvtColor(frame_, cv2.COLOR_BGR2GRAY) |
|
m_ = cv2.inRange(frame_, 255, 255) |
|
frame_[m_>0] = 127 |
|
m_ = cv2.inRange(frame_, 128, 128) |
|
frame_[m_>0] = 255 |
|
|
|
|
|
|
|
m = cv2.inRange(frame, np.array([240,240,240]), np.array([255,255,255])) |
|
frame[m>0] = (239,239,239) |
|
m = cv2.inRange(frame, np.array([0,0,0]), np.array([15,15,15])) |
|
frame[m>0] = (16,16,16) |
|
frame[frame_==0] = (frame[frame_==0] / 17).astype(np.uint8) |
|
frame[frame_==255] = (255,255,255) |
|
|
|
cv2.imwrite(fl, frame) |
|
return fl |
|
|
|
def logscale(linear): |
|
return int(math.pow(2, linear)) |
|
|
|
def linscale(linear): |
|
return int(math.log2(linear)) |
|
|
|
def sharpest(fl, i): |
|
break_vid = get_frames(fl, "vid_input_frame", "origin", i) |
|
|
|
frames = [] |
|
blur_s = [] |
|
for jdx, fr in enumerate(break_vid[0]): |
|
frames.append(cv2.imread(fr).astype(np.uint8)) |
|
blur_s.append(cv2.Laplacian(cv2.cvtColor(frames[len(frames)-1], cv2.COLOR_BGR2GRAY), cv2.CV_64F).var()) |
|
print(str(int(blur_s[jdx]))) |
|
|
|
indx = np.argmax(blur_s) |
|
fl = break_vid[0][indx] |
|
|
|
n = 25 |
|
half = int(n/2) |
|
if indx-half < 0: |
|
n = indx*2+1 |
|
elif indx+half >= len(frames): |
|
n = (len(frames)-1-indx)*2+1 |
|
|
|
|
|
frame = cv2.fastNlMeansDenoisingColoredMulti( |
|
srcImgs = frames, |
|
imgToDenoiseIndex = indx, |
|
temporalWindowSize = n, |
|
hColor = 5, |
|
templateWindowSize = 21, |
|
searchWindowSize = 21) |
|
|
|
cv2.imwrite(fl, frame) |
|
print(str(i) +'th file, sharpest frame: '+str(indx)+', name: '+fl) |
|
return fl |
|
|
|
def sortFiles(e): |
|
e = e.split('/') |
|
return e[len(e)-1] |
|
|
|
def loadf(f, s, l, v, l_t, r_bg): |
|
if f != None and f[0] != None: |
|
f.sort(key=sortFiles) |
|
fnew = [] |
|
|
|
for i, fl in enumerate(f): |
|
ftype = fl.split('/') |
|
if ftype[len(ftype)-1].split('.')[1] == 'mp4': |
|
fl = sharpest(fl, i) |
|
|
|
if r_bg == True: |
|
fl = remove_bg(fl, s, l, v, l_t) |
|
|
|
fnew.append(fl) |
|
|
|
return fnew, fnew |
|
else: |
|
return f, f |
|
|
|
|
|
title=""" |
|
<div style="text-align: center; max-width: 500px; margin: 0 auto;"> |
|
<div |
|
style=" |
|
display: inline-flex; |
|
align-items: center; |
|
gap: 0.8rem; |
|
font-size: 1.75rem; |
|
margin-bottom: 10px; |
|
" |
|
> |
|
<h1 style="font-weight: 600; margin-bottom: 7px;"> |
|
Video interpolation from images with FILM |
|
</h1> |
|
|
|
</div> |
|
<p> This space uses FILM to generate interpolation frames in a set of image files you need to turn into a video. |
|
Limited to 300 uploaded frames, from the beginning of your input.<br /> |
|
<a style="display:inline-block" href="https://huggingface.co/spaces/freealise/video_frame_interpolation?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a> |
|
</p> |
|
</div> |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Column(): |
|
gr.HTML(title) |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Accordion(label="Upload files here", open=True): |
|
files_orig = gr.File(file_count="multiple", file_types=['image', '.mp4']) |
|
files_input = gr.File(file_count="multiple", visible=False) |
|
gallery_input = gr.Gallery(label="Slideshow", preview=True, columns=8192, interactive=False) |
|
with gr.Accordion(label="Background removal settings", open=False): |
|
with gr.Tab(label="Shadow maximums"): |
|
max_s = gr.Slider(minimum=0, maximum=255, step=1, value=32, label="Saturation") |
|
max_l = gr.Slider(minimum=0, maximum=255, step=1, value=64, label="Lightness") |
|
max_v = gr.Slider(minimum=0, maximum=255, step=1, value=16, label="Detail") |
|
lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider") |
|
rbg = gr.Checkbox(label="Remove background", value=True) |
|
files_orig.upload(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input]) |
|
max_s.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input]) |
|
max_l.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input]) |
|
max_v.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input]) |
|
|
|
with gr.Row(): |
|
interpolation_slider = gr.Slider(minimum=1, maximum=5, step=1, value=1, label="Interpolation Steps: ") |
|
interpolation = gr.Number(value=1, show_label=False, interactive=False) |
|
interpolation_slider.change(fn=logscale, inputs=[interpolation_slider], outputs=[interpolation]) |
|
with gr.Row(): |
|
fps_output_slider = gr.Slider(minimum=0, maximum=5, step=1, value=0, label="FPS output: ") |
|
fps_output = gr.Number(value=1, show_label=False, interactive=False) |
|
fps_output_slider.change(fn=logscale, inputs=[fps_output_slider], outputs=[fps_output]) |
|
submit_btn = gr.Button("Submit") |
|
|
|
with gr.Column(): |
|
video_output = gr.Video() |
|
file_output = gr.File() |
|
|
|
gr.Examples( |
|
examples=[[ |
|
["./examples/0.png", "./examples/1.png", "./examples/2.png", "./examples/3.png", "./examples/4.png"], |
|
32, 64, 16, "slider", True |
|
]], |
|
fn=loadf, |
|
inputs=[files_orig, max_s, max_l, max_v, lt, rbg], |
|
outputs=[files_input, gallery_input], |
|
cache_examples=True |
|
) |
|
|
|
submit_btn.click(fn=infer, inputs=[files_input, interpolation_slider, fps_output_slider], outputs=[video_output, file_output]) |
|
|
|
demo.launch() |