Spaces:

nateraw
/

video-to-sketch

Build error

App Files Files Community

yourusername commited on Jul 11, 2022

Commit

28b1a6e

1 Parent(s): a58eee5

:beers: cheers

Browse files

Files changed (3) hide show

app.py +77 -0
modeling.py +89 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import gc
+import gradio as gr
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from PIL.Image import Resampling
+from pytorchvideo.data.encoded_video import EncodedVideo
+from pytorchvideo.transforms.functional import uniform_temporal_subsample
+from torchvision.io import write_video
+from torchvision.transforms.functional import resize
+from modeling import Generator
+MAX_DURATION = 4
+OUT_FPS = 18
+DEVICE = "cpu" if not torch.cuda.is_available() else "cuda"
+# Reupload of model found here: https://huggingface.co/spaces/awacke1/Image2LineDrawing
+model = Generator(3, 1, 3)
+weights_path = hf_hub_download("nateraw/image-2-line-drawing", "pytorch_model.bin")
+model.load_state_dict(torch.load(weights_path, map_location=DEVICE))
+model.eval()
+def process_one_second(vid, start_sec, out_fps):
+    """Process one second of a video at a given fps
+    Args:
+        vid (_type_): A pytorchvideo.EncodedVideo instance containing the video to process
+        start_sec (_type_): The second to start processing at
+        out_fps (_type_): The fps to output the video at
+    Returns:
+        np.array: The processed video as a numpy array with shape (T, H, W, C)
+    """
+    # C, T, H, W
+    video_arr = vid.get_clip(start_sec, start_sec + 1)["video"]
+    # C, T, H, W where T == frames per second
+    x = uniform_temporal_subsample(video_arr, out_fps)
+    # C, T, H, W where H has been scaled to 256 (This will probably be no bueno on vertical vids but whatever)
+    x = resize(x, 256, Resampling.BICUBIC)
+    # C, T, H, W -> T, C, H, W (basically T acts as batch size now)
+    x = x.permute(1, 0, 2, 3)
+    with torch.no_grad():
+        # T, 1, H, W
+        out = model(x)
+    # T, C, H, W -> T, H, W, C Rescaled to 0-255
+    out = out.permute(0, 2, 3, 1).clip(0, 1) * 255
+    # Greyscale -> RGB
+    out = out.repeat(1, 1, 1, 3)
+    return out
+def fn(fpath):
+    start_sec = 0
+    vid = EncodedVideo.from_path(fpath)
+    duration = min(MAX_DURATION, int(vid.duration))
+    for i in range(duration):
+        print(f"🖼️ Processing step {i + 1}/{duration}...")
+        video = process_one_second(vid, start_sec=i + start_sec, out_fps=OUT_FPS)
+        gc.collect()
+        if i == 0:
+            video_all = video
+        else:
+            video_all = np.concatenate((video_all, video))
+    write_video("out.mp4", video_all, fps=OUT_FPS)
+    return "out.mp4"
+webcam_interface = gr.Interface(
+    fn, gr.Video(source="webcam"), gr.Video(type="file", format="mp4")
+)
+webcam_interface.launch()

modeling.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Taken from here: https://huggingface.co/spaces/awacke1/Image2LineDrawing
+from torch import nn
+norm_layer = nn.InstanceNorm2d
+class ResidualBlock(nn.Module):
+    def __init__(self, in_features):
+        super(ResidualBlock, self).__init__()
+        conv_block = [
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_features, in_features, 3),
+            norm_layer(in_features),
+            nn.ReLU(inplace=True),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(in_features, in_features, 3),
+            norm_layer(in_features),
+        ]
+        self.conv_block = nn.Sequential(*conv_block)
+    def forward(self, x):
+        return x + self.conv_block(x)
+class Generator(nn.Module):
+    def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True):
+        super(Generator, self).__init__()
+        # Initial convolution block
+        model0 = [
+            nn.ReflectionPad2d(3),
+            nn.Conv2d(input_nc, 64, 7),
+            norm_layer(64),
+            nn.ReLU(inplace=True),
+        ]
+        self.model0 = nn.Sequential(*model0)
+        # Downsampling
+        model1 = []
+        in_features = 64
+        out_features = in_features * 2
+        for _ in range(2):
+            model1 += [
+                nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
+                norm_layer(out_features),
+                nn.ReLU(inplace=True),
+            ]
+            in_features = out_features
+            out_features = in_features * 2
+        self.model1 = nn.Sequential(*model1)
+        model2 = []
+        # Residual blocks
+        for _ in range(n_residual_blocks):
+            model2 += [ResidualBlock(in_features)]
+        self.model2 = nn.Sequential(*model2)
+        # Upsampling
+        model3 = []
+        out_features = in_features // 2
+        for _ in range(2):
+            model3 += [
+                nn.ConvTranspose2d(
+                    in_features, out_features, 3, stride=2, padding=1, output_padding=1
+                ),
+                norm_layer(out_features),
+                nn.ReLU(inplace=True),
+            ]
+            in_features = out_features
+            out_features = in_features // 2
+        self.model3 = nn.Sequential(*model3)
+        # Output layer
+        model4 = [nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, 7)]
+        if sigmoid:
+            model4 += [nn.Sigmoid()]
+        self.model4 = nn.Sequential(*model4)
+    def forward(self, x, cond=None):
+        out = self.model0(x)
+        out = self.model1(out)
+        out = self.model2(out)
+        out = self.model3(out)
+        out = self.model4(out)
+        return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+huggingface_hub
+torch==1.11.0
+torchvision==0.12.0
+pytorchvideo==0.1.5