freealise's picture
Update app.py
66e74e2 verified
raw
history blame
14.3 kB
import os
os.system("git clone https://github.com/google-research/frame-interpolation")
import sys
sys.path.append("frame-interpolation")
import math
import cv2
import numpy as np
import tensorflow as tf
import mediapy
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style")
from eval import interpolator, util
interpolator = interpolator.Interpolator(model, None)
ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)
def do_interpolation(frame1, frame2, interpolation, n):
print("tween frames: " + str(interpolation))
print(frame1, frame2)
input_frames = [frame1, frame2]
frames = list(
util.interpolate_recursively_from_files(
input_frames, int(interpolation), interpolator))
#print(frames)
mediapy.write_video(f"{n}_to_{n+1}_out.mp4", frames, fps=25)
return f"{n}_to_{n+1}_out.mp4"
def get_frames(video_in, step, name, n):
frames = []
cap = cv2.VideoCapture(video_in)
cframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cfps = int(cap.get(cv2.CAP_PROP_FPS))
print(f'frames: {cframes}, fps: {cfps}')
#resize the video
#clip = VideoFileClip(video_in)
#check fps
#if cfps > 25:
# print("video rate is over 25, resetting to 25")
# clip_resized = clip.resize(height=1024)
# clip_resized.write_videofile("video_resized.mp4", fps=25)
#else:
# print("video rate is OK")
# clip_resized = clip.resize(height=1024)
# clip_resized.write_videofile("video_resized.mp4", fps=cfps)
#print("video resized to 1024 height")
# Opens the Video file with CV2
#cap = cv2.VideoCapture("video_resized.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
print("video fps: " + str(fps))
i=0
while(cap.isOpened()):
ret, frame = cap.read()
if ret == False:
break
#if resize_w > 0:
#resize_h = resize_w / 2.0
#frame = cv2.resize(frame, (int(resize_w), int(resize_h)))
cv2.imwrite(f"{str(n)}_{name}_{step}{str(i)}.png", frame)
frames.append(f"{str(n)}_{name}_{step}{str(i)}.png")
i+=1
cap.release()
cv2.destroyAllWindows()
print("broke the video into frames")
return frames, fps
def create_video(frames, fps, type):
print("building video result")
imgs = []
for j, img in enumerate(frames):
imgs.append(cv2.cvtColor(cv2.imread(img).astype(np.uint8), cv2.COLOR_BGR2RGB))
mediapy.write_video(type + "_result.mp4", imgs, fps=fps)
return type + "_result.mp4"
def infer(f_in, interpolation, fps_output):
fps_output = logscale(fps_output)
# 1. break video into frames and get FPS
#break_vid = get_frames(url_in, "vid_input_frame", "origin", resize_n)
frames_list = f_in #break_vid[0]
fps = 1 #break_vid[1]
print(f"ORIGIN FPS: {fps}")
n_frame = int(300) #limited to 300 frames
#n_frame = len(frames_list)
if n_frame >= len(frames_list):
print("video is shorter than the cut value")
n_frame = len(frames_list)
# 2. prepare frames result arrays
result_frames = []
print("set stop frames to: " + str(n_frame))
for idx, frame in enumerate(frames_list[0:int(n_frame)]):
if idx < len(frames_list) - 1:
next_frame = frames_list[idx+1]
interpolated_frames = do_interpolation(frame, next_frame, interpolation, idx) # should return a list of interpolated frames
break_interpolated_video = get_frames(interpolated_frames, "interpol", f"{idx}_", -1)
print(break_interpolated_video[0])
for j, img in enumerate(break_interpolated_video[0][0:len(break_interpolated_video[0])-1]):
print(f"IMG:{img}")
os.rename(img, f"{idx}_to_{idx+1}_{j}.png")
result_frames.append(f"{idx}_to_{idx+1}_{j}.png")
print("frames " + str(idx) + " & " + str(idx+1) + "/" + str(n_frame) + ": done;")
#print(f"CURRENT FRAMES: {result_frames}")
result_frames.append(f"{frames_list[n_frame-1]}")
final_vid = create_video(result_frames, fps_output, "interpolated")
files = final_vid
print("interpolated frames: " + str(len(frames_list)) + " -> " + str(len(result_frames)))
cv2.destroyAllWindows()
return final_vid, files
def remove_bg(fl, s, l, v, l_t):
frame = cv2.imread(fl).astype(np.uint8)
b = 5
#subtract background (get scene with shadow)
bg = cv2.medianBlur(frame, 255)
frame_ = ((bg.astype(np.int16)-frame.astype(np.int16))+127).astype(np.uint8)
frame_ = cv2.bilateralFilter(frame_, b*4, b*8, b*2)
frame_ = cv2.medianBlur(frame_, b)
element = cv2.getStructuringElement(cv2.MORPH_RECT, (2*b+1, 2*b+1), (b,b))
frame_ = cv2.erode(cv2.dilate(frame_, element), element)
#correct hue against light
bg_gray = cv2.cvtColor(cv2.cvtColor(bg, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR)
bg_diff = (bg-bg_gray).astype(np.int16)
frame_c = (frame.astype(np.int16)-bg_diff).astype(np.uint8)
hsv_ = cv2.cvtColor(frame_c, cv2.COLOR_BGR2HSV)
edges = cv2.Laplacian(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), cv2.CV_64F)
blur_s = np.zeros_like(edges)
for i in range(2, frame.shape[0]-2):
for j in range(2, frame.shape[1]-2):
d = edges[i-2:i+2, j-2:j+2].var()
blur_s[i,j] = d.astype(np.uint8)
print(fl)
print("detail")
print(np.average(blur_s))
print(np.median(blur_s))
print("saturation")
print(np.average(hsv_[:,:,1]))
print(np.median(hsv_[:,:,1]))
print("lightness")
print(np.average(hsv_[:,:,2]))
print(np.median(hsv_[:,:,2]))
#remove regions of low saturation, lightness and detail (get scene without shadow)
if l_t == "slider":
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180,s,l]))
mask = cv2.inRange(blur_s, 0, v)
elif l_t == "average":
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.average(hsv_[:,:,1])), int(np.average(hsv_[:,:,2]))]))
mask = cv2.inRange(blur_s, 0, int(np.average(blur_s)))
elif l_t == "median":
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.median(hsv_[:,:,1])), int(np.median(hsv_[:,:,2]))]))
mask = cv2.inRange(blur_s, 0, int(np.median(blur_s)))
masks = np.bitwise_and(m, mask)
frame_[masks==0] = (0,0,0)
m_ = frame_.reshape((-1,3))
# convert to np.float32
m_ = np.float32(m_)
# define criteria, number of clusters(K) and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 16, 1.0)
K = 3
ret,label,center=cv2.kmeans(m_, K, None, criteria, 16, cv2.KMEANS_PP_CENTERS)
# Now convert back into uint8, and make original image
center = np.uint8(center)
res = center[label.flatten()]
frame_ = res.reshape((frame_.shape))
#remove shadows at edges
m_ = cv2.inRange(frame_, np.array([128,128,128]), np.array([255,255,255]))
frame_[m_>0] = (255,255,255)
cv2.rectangle(frame_,(0,0),(frame_.shape[1]-1,frame_.shape[0]-1),(255,255,255),7)
mask = cv2.floodFill(frame_, None, (0, 0), 255, 0, 0, (4 | cv2.FLOODFILL_FIXED_RANGE))[2] #(4 | cv2.FLOODFILL_FIXED_RANGE | cv2.FLOODFILL_MASK_ONLY | 255 << 8)
# 255 << 8 tells to fill with the value 255)
mask = mask[1:mask.shape[0]-1, 1:mask.shape[1]-1]
frame_[mask>0] = (127,127,127)
m_ = cv2.inRange(frame_, np.array([1,1,1]), np.array([127,127,127]))
frame_[m_>0] = (127,127,127)
#shadow is black, bg is white, fg is gray
frame_ = 255 - cv2.cvtColor(frame_, cv2.COLOR_BGR2GRAY)
m_ = cv2.inRange(frame_, 255, 255)
frame_[m_>0] = 127
m_ = cv2.inRange(frame_, 128, 128)
frame_[m_>0] = 255
#apply mask to output
m = cv2.inRange(frame, np.array([240,240,240]), np.array([255,255,255]))
frame[m>0] = (239,239,239)
m = cv2.inRange(frame, np.array([0,0,0]), np.array([15,15,15]))
frame[m>0] = (16,16,16)
frame[frame_==0] = (frame[frame_==0] / 17).astype(np.uint8)
frame[frame_==255] = (255,255,255)
cv2.imwrite(fl, frame)
return fl
def logscale(linear):
return int(math.pow(2, linear))
def linscale(linear):
return int(math.log2(linear))
def sharpest(fl, i):
break_vid = get_frames(fl, "vid_input_frame", "origin", i)
frames = []
blur_s = []
for jdx, fr in enumerate(break_vid[0]):
frames.append(cv2.imread(fr).astype(np.uint8))
blur_s.append(cv2.Laplacian(cv2.cvtColor(frames[len(frames)-1], cv2.COLOR_BGR2GRAY), cv2.CV_64F).var())
print(str(int(blur_s[jdx])))
indx = np.argmax(blur_s)
fl = break_vid[0][indx]
n = 25
half = int(n/2)
if indx-half < 0:
n = indx*2+1
elif indx+half >= len(frames):
n = (len(frames)-1-indx)*2+1
#denoise
frame = cv2.fastNlMeansDenoisingColoredMulti(
srcImgs = frames,
imgToDenoiseIndex = indx,
temporalWindowSize = n,
hColor = 5,
templateWindowSize = 21,
searchWindowSize = 21)
cv2.imwrite(fl, frame)
print(str(i) +'th file, sharpest frame: '+str(indx)+', name: '+fl)
return fl
def sortFiles(e):
e = e.split('/')
return e[len(e)-1]
def loadf(f, s, l, v, l_t, r_bg):
if f != None and f[0] != None:
f.sort(key=sortFiles)
fnew = []
for i, fl in enumerate(f):
ftype = fl.split('/')
if ftype[len(ftype)-1].split('.')[1] == 'mp4':
fl = sharpest(fl, i)
if r_bg == True:
fl = remove_bg(fl, s, l, v, l_t)
fnew.append(fl)
return fnew, fnew
else:
return f, f
title="""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
margin-bottom: 10px;
"
>
<h1 style="font-weight: 600; margin-bottom: 7px;">
Video interpolation from images with FILM
</h1>
</div>
<p> This space uses FILM to generate interpolation frames in a set of image files you need to turn into a video.
Limited to 300 uploaded frames, from the beginning of your input.<br />
<a style="display:inline-block" href="https://huggingface.co/spaces/freealise/video_frame_interpolation?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
</p>
</div>
"""
with gr.Blocks() as demo:
with gr.Column():
gr.HTML(title)
with gr.Row():
with gr.Column():
with gr.Accordion(label="Upload files here", open=True):
files_orig = gr.File(file_count="multiple", file_types=['image', '.mp4'])
files_input = gr.File(file_count="multiple", visible=False)
gallery_input = gr.Gallery(label="Slideshow", preview=True, columns=8192, interactive=False)
with gr.Accordion(label="Background removal settings", open=False):
with gr.Tab(label="Shadow maximums"):
max_s = gr.Slider(minimum=0, maximum=255, step=1, value=32, label="Saturation")
max_l = gr.Slider(minimum=0, maximum=255, step=1, value=64, label="Lightness")
max_v = gr.Slider(minimum=0, maximum=255, step=1, value=16, label="Detail")
lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider")
rbg = gr.Checkbox(label="Remove background", value=True)
files_orig.upload(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input])
max_s.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input])
max_l.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input])
max_v.input(fn=loadf, inputs=[files_orig, max_s, max_l, max_v, lt, rbg], outputs=[files_input, gallery_input])
with gr.Row():
interpolation_slider = gr.Slider(minimum=1, maximum=5, step=1, value=1, label="Interpolation Steps: ")
interpolation = gr.Number(value=1, show_label=False, interactive=False)
interpolation_slider.change(fn=logscale, inputs=[interpolation_slider], outputs=[interpolation])
with gr.Row():
fps_output_slider = gr.Slider(minimum=0, maximum=5, step=1, value=0, label="FPS output: ")
fps_output = gr.Number(value=1, show_label=False, interactive=False)
fps_output_slider.change(fn=logscale, inputs=[fps_output_slider], outputs=[fps_output])
submit_btn = gr.Button("Submit")
with gr.Column():
video_output = gr.Video()
file_output = gr.File()
gr.Examples(
examples=[[
["./examples/0.png", "./examples/1.png", "./examples/2.png", "./examples/3.png", "./examples/4.png"],
32, 64, 16, "slider", True
]],
fn=loadf,
inputs=[files_orig, max_s, max_l, max_v, lt, rbg],
outputs=[files_input, gallery_input],
cache_examples=True
)
submit_btn.click(fn=infer, inputs=[files_input, interpolation_slider, fps_output_slider], outputs=[video_output, file_output])
demo.launch()