Spaces:

timroelofs123
/

face_re-aging_img

Runtime error

App Files Files Community

timroelofs123 commited on Feb 7, 2024

Commit

e2a98a8

1 Parent(s): 7efa9d0

new demos

Browse files

Files changed (12) hide show

app.py +70 -11
assets/gradio_example_images/1.png +0 -0
assets/mask1024.jpg +0 -0
assets/mask512.jpg +0 -0
model/__init__.py +0 -0
model/losses.py +0 -70
model/models.py +0 -99
requirements.txt +2 -0
scripts/__init__.py +0 -0
scripts/test_functions.py +0 -141
utils/__init__.py +0 -0
utils/dataloader.py +0 -63

app.py CHANGED Viewed

@@ -1,12 +1,25 @@
 import gradio as gr
 import torch
-import argparse
 import git
 git.Repo.clone_from("https://huggingface.co/timroelofs123/face_re-aging", "./hf")
 from model.models import UNet
-from scripts.test_functions import process_image
 model_path = "hf/best_unet_model.pth"
@@ -15,12 +28,20 @@ unet_model = UNet().to(device)
 unet_model.load_state_dict(torch.load(model_path, map_location=device))
 unet_model.eval()
-def block(image, source_age, target_age):
     return process_image(unet_model, image, video=False, source_age=source_age,
-                         target_age=target_age, window_size=512, stride=256)
-demo = gr.Interface(
-    fn=block,
     inputs=[
         gr.Image(type="pil"),
         gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
@@ -29,12 +50,50 @@ demo = gr.Interface(
     outputs="image",
     examples=[
         ['assets/gradio_example_images/1.png', 20, 80],
-    #    ['assets/gradio_example_images/2.png', 75, 40],
-    #    ['assets/gradio_example_images/3.png', 30, 70],
-    #    ['assets/gradio_example_images/4.png', 22, 60],
-    #    ['assets/gradio_example_images/5.png', 28, 75],
-    #    ['assets/gradio_example_images/6.png', 35, 15]
     ],
 )
 demo.launch()

 import gradio as gr
 import torch
 import git
+import os, shutil
 git.Repo.clone_from("https://huggingface.co/timroelofs123/face_re-aging", "./hf")
+git.Repo.clone_from("https://github.com/timroelofs123/face_reaging", "./fr")
+shutil.move('./fr/assets', '.')
+shutil.move('./fr/models', '.')
+shutil.move('./fr/scripts', '.')
+shutil.move('./fr/utils', '.')
 from model.models import UNet
+from scripts.test_functions import process_image, process_video
+# default settings
+window_size = 512
+stride = 256
+steps = 18
+frame_count = 100
 model_path = "hf/best_unet_model.pth"
 unet_model.load_state_dict(torch.load(model_path, map_location=device))
 unet_model.eval()
+def block_img(image, source_age, target_age):
     return process_image(unet_model, image, video=False, source_age=source_age,
+                          target_age=target_age, window_size=window_size, stride=stride)
+def block_img_vid(image, source_age):
+    return process_image(unet_model, image, video=True, source_age=source_age,
+                          target_age=0, window_size=window_size, stride=stride, steps=steps)
+def block_vid(video_path, source_age, target_age):
+    return process_video(unet_model, video_path, source_age, target_age,
+                          window_size=window_size, stride=stride, frame_count=frame_count)
+demo_img = gr.Interface(
+    fn=block_img,
     inputs=[
         gr.Image(type="pil"),
         gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
     outputs="image",
     examples=[
         ['assets/gradio_example_images/1.png', 20, 80],
+        ['assets/gradio_example_images/2.png', 75, 40],
+        ['assets/gradio_example_images/3.png', 30, 70],
+        ['assets/gradio_example_images/4.png', 22, 60],
+        ['assets/gradio_example_images/5.png', 28, 75],
+        ['assets/gradio_example_images/6.png', 35, 15]
+    ],
+    description="Input an image of a person and age them from the source age to the target age."
+)
+demo_img_vid = gr.Interface(
+    fn=block_img_vid,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+    ],
+    outputs=gr.Video(),
+    examples=[
+        ['assets/gradio_example_images/1.png', 20],
+        ['assets/gradio_example_images/2.png', 75],
+        ['assets/gradio_example_images/3.png', 30],
+        ['assets/gradio_example_images/4.png', 22],
+        ['assets/gradio_example_images/5.png', 28],
+        ['assets/gradio_example_images/6.png', 35]
+    ],
+    description="Input an image of a person and a video will be returned of the person at different ages."
+)
+demo_vid = gr.Interface(
+    fn=block_vid,
+    inputs=[
+        gr.Video(),
+        gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+        gr.Slider(10, 90, value=80, step=1, label="Target age", info="Choose the age you want to become")
     ],
+    outputs=gr.Video(),
+    # examples=[
+    #     ['assets/gradio_example_images/orig.mp4', 35, 60],
+    # ],
+    description="Input a video of a person, and it will be aged frame-by-frame."
 )
+demo = gr.TabbedInterface([demo_img, demo_img_vid, demo_vid],
+                          tab_names=['Image inference demo', 'Image animation demo', 'Video inference demo'],
+                          title="Face Re-Aging Demo",
+                          )
 demo.launch()

assets/gradio_example_images/1.png DELETED Viewed

Binary file (987 kB)

assets/mask1024.jpg DELETED Viewed

Binary file (207 kB)

assets/mask512.jpg DELETED Viewed

Binary file (10.5 kB)

model/__init__.py DELETED Viewed

File without changes

model/losses.py DELETED Viewed

@@ -1,70 +0,0 @@
-import torch
-import torch.nn as nn
-import lpips  # LPIPS library for perceptual loss
-class GeneratorLoss(nn.Module):
-    def __init__(self, discriminator_model, l1_weight=1.0, perceptual_weight=1.0, adversarial_weight=0.05,
-                 device="cpu"):
-        super(GeneratorLoss, self).__init__()
-        self.discriminator_model = discriminator_model
-        self.l1_weight = l1_weight
-        self.perceptual_weight = perceptual_weight
-        self.adversarial_weight = adversarial_weight
-        self.criterion_l1 = nn.L1Loss()
-        self.criterion_adversarial = nn.BCEWithLogitsLoss()
-        self.criterion_perceptual = lpips.LPIPS(net='vgg').to(device)
-    def forward(self, output, target, source):
-        # L1 loss
-        l1_loss = self.criterion_l1(output, target)
-        # Perceptual loss
-        perceptual_loss = torch.mean(self.criterion_perceptual(output, target))
-        # Adversarial loss
-        fake_input = torch.cat([output, source[:, 4:5, :, :]], dim=1)
-        fake_prediction = self.discriminator_model(fake_input)
-        adversarial_loss = self.criterion_adversarial(fake_prediction, torch.ones_like(fake_prediction))
-        # Combine losses
-        generator_loss = self.l1_weight * l1_loss + self.perceptual_weight * perceptual_loss + \
-                         self.adversarial_weight * adversarial_loss
-        return generator_loss, l1_loss, perceptual_loss, adversarial_loss
-class DiscriminatorLoss(nn.Module):
-    def __init__(self, discriminator_model, fake_weight=1.0, real_weight=2.0, mock_weight=.5):
-        super(DiscriminatorLoss, self).__init__()
-        self.discriminator_model = discriminator_model
-        self.criterion_adversarial = nn.BCEWithLogitsLoss()
-        self.fake_weight = fake_weight
-        self.real_weight = real_weight
-        self.mock_weight = mock_weight
-    def forward(self, output, target, source):
-        # Adversarial loss
-        fake_input = torch.cat([output, source[:, 4:5, :, :]], dim=1)  # prediction img with target age
-        real_input = torch.cat([target, source[:, 4:5, :, :]], dim=1)  # target img with target age
-        mock_input1 = torch.cat([source[:, :3, :, :], source[:, 4:5, :, :]], dim=1)  # source img with target age
-        mock_input2 = torch.cat([target, source[:, 3:4, :, :]], dim=1)  # target img with source age
-        mock_input3 = torch.cat([output, source[:, 3:4, :, :]], dim=1)  # prediction img with source age
-        mock_input4 = torch.cat([target, source[:, 3:4, :, :]], dim=1)  # target img with target age
-        fake_pred, real_pred = self.discriminator_model(fake_input), self.discriminator_model(real_input)
-        mock_pred1, mock_pred2, mock_pred3, mock_pred4 = (self.discriminator_model(mock_input1),
-                                                          self.discriminator_model(mock_input2),
-                                                          self.discriminator_model(mock_input3),
-                                                          self.discriminator_model(mock_input4))
-        discriminator_loss = (self.fake_weight * self.criterion_adversarial(fake_pred, torch.zeros_like(fake_pred)) +
-                              self.real_weight * self.criterion_adversarial(real_pred, torch.ones_like(real_pred)) +
-                              self.mock_weight * self.criterion_adversarial(mock_pred1, torch.zeros_like(mock_pred1)) +
-                              self.mock_weight * self.criterion_adversarial(mock_pred2, torch.zeros_like(mock_pred2)) +
-                              self.mock_weight * self.criterion_adversarial(mock_pred3, torch.zeros_like(mock_pred3)) +
-                              self.mock_weight * self.criterion_adversarial(mock_pred4, torch.zeros_like(mock_pred4))
-                              )
-        return discriminator_loss

model/models.py DELETED Viewed

@@ -1,99 +0,0 @@
-import torch
-import torch.nn as nn
-import antialiased_cnns
-class DownLayer(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(DownLayer, self).__init__()
-        self.layer = nn.Sequential(
-            nn.MaxPool2d(kernel_size=2, stride=1),
-            antialiased_cnns.BlurPool(in_channels, stride=2),
-            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
-            nn.LeakyReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
-            nn.LeakyReLU(inplace=True)
-        )
-    def forward(self, x):
-        return self.layer(x)
-class UpLayer(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(UpLayer, self).__init__()
-        # Conv transpose upsampling
-        self.blur_upsample = nn.Sequential(
-            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2, padding=0),
-            antialiased_cnns.BlurPool(out_channels, stride=1)
-        )
-        self.layer = nn.Sequential(
-            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
-            nn.LeakyReLU(inplace=True),
-            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
-            nn.LeakyReLU(inplace=True)
-        )
-    def forward(self, x, skip):
-        x = self.blur_upsample(x)
-        x = torch.cat([x, skip], dim=1)  # Concatenate with skip connection
-        return self.layer(x)
-class UNet(nn.Module):
-    def __init__(self):
-        super(UNet, self).__init__()
-        self.init_conv = nn.Sequential(
-            nn.Conv2d(5, 64, kernel_size=3, padding=1),  # output: 512 x 512 x 64
-            nn.LeakyReLU(inplace=True),
-            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # output: 512 x 512 x 64
-            nn.LeakyReLU(inplace=True)
-        )
-        self.down1 = DownLayer(64, 128)  # output: 256 x 256 x 128
-        self.down2 = DownLayer(128, 256)  # output: 128 x 128 x 256
-        self.down3 = DownLayer(256, 512)  # output: 64 x 64 x 512
-        self.down4 = DownLayer(512, 1024)  # output: 32 x 32 x 1024
-        self.up1 = UpLayer(1024, 512)  # output: 64 x 64 x 512
-        self.up2 = UpLayer(512, 256)  # output: 128 x 128 x 256
-        self.up3 = UpLayer(256, 128)  # output: 256 x 256 x 128
-        self.up4 = UpLayer(128, 64)  # output: 512 x 512 x 64
-        self.final_conv = nn.Conv2d(64, 3, kernel_size=1)  # output: 512 x 512 x 3
-    def forward(self, x):
-        x0 = self.init_conv(x)
-        x1 = self.down1(x0)
-        x2 = self.down2(x1)
-        x3 = self.down3(x2)
-        x4 = self.down4(x3)
-        x = self.up1(x4, x3)
-        x = self.up2(x, x2)
-        x = self.up3(x, x1)
-        x = self.up4(x, x0)
-        x = self.final_conv(x)
-        return x
-class PatchGANDiscriminator(nn.Module):
-    def __init__(self, input_channels=3):
-        super(PatchGANDiscriminator, self).__init__()
-        self.model = nn.Sequential(
-            nn.Conv2d(input_channels, 64, kernel_size=4, stride=2, padding=1),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
-            nn.BatchNorm2d(128),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
-            nn.BatchNorm2d(256),
-            nn.LeakyReLU(0.2, inplace=True),
-            nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=1)
-            # Output layer with 1 channel for binary classification
-        )
-    def forward(self, x):
-        return self.model(x)

requirements.txt CHANGED Viewed

@@ -2,4 +2,6 @@ torch
 torchvision
 antialiased_cnns
 face_recognition
 gitpython

 torchvision
 antialiased_cnns
 face_recognition
+ffmpy
+av
 gitpython

scripts/__init__.py DELETED Viewed

File without changes

scripts/test_functions.py DELETED Viewed

@@ -1,141 +0,0 @@
-import face_recognition
-import numpy as np
-from PIL import Image
-import torch
-from torch.autograd import Variable
-from torchvision import transforms
-from torchvision.io import write_video
-import tempfile
-mask_file = torch.from_numpy(np.array(Image.open('assets/mask1024.jpg').convert('L'))) / 255
-small_mask_file = torch.from_numpy(np.array(Image.open('assets/mask512.jpg').convert('L'))) / 255
-def sliding_window_tensor(input_tensor, window_size, stride, your_model, mask=mask_file, small_mask=small_mask_file):
-    """
-    Apply aging operation on input tensor using a sliding-window method. This operation is done on the GPU, if available.
-    """
-    input_tensor = input_tensor.to(next(your_model.parameters()).device)
-    mask = mask.to(next(your_model.parameters()).device)
-    small_mask = small_mask.to(next(your_model.parameters()).device)
-    n, c, h, w = input_tensor.size()
-    output_tensor = torch.zeros((n, 3, h, w), dtype=input_tensor.dtype, device=input_tensor.device)
-    count_tensor = torch.zeros((n, 3, h, w), dtype=torch.float32, device=input_tensor.device)
-    add = 2 if window_size % stride != 0 else 1
-    for y in range(0, h - window_size + add, stride):
-        for x in range(0, w - window_size + add, stride):
-            window = input_tensor[:, :, y:y + window_size, x:x + window_size]
-            # Apply the same preprocessing as during training
-            input_variable = Variable(window, requires_grad=False)  # Assuming GPU is available
-            # Forward pass
-            with torch.no_grad():
-                output = your_model(input_variable)
-            output_tensor[:, :, y:y + window_size, x:x + window_size] += output * small_mask
-            count_tensor[:, :, y:y + window_size, x:x + window_size] += small_mask
-    count_tensor = torch.clamp(count_tensor, min=1.0)
-    # Average the overlapping regions
-    output_tensor /= count_tensor
-    # Apply mask
-    output_tensor *= mask
-    return output_tensor.cpu()
-def process_image(your_model, image, video, source_age, target_age=0,
-                  window_size=512, stride=256, steps=18):
-    """
-    Aging the person in the image.
-    If video=False, we age as from source_age to target_age, and return an image.
-    If video=True, we age from source_age to a range of target ages, and return this as the path to a video.
-    """
-    if video:
-        target_age = 0
-    input_size = (1024, 1024)
-    # image = face_recognition.load_image_file(filename)
-    image = np.array(image)
-    if video:  # h264 codec requires frame size to be divisible by 2.
-        width, height, depth = image.shape
-        new_width = width if width % 2 == 0 else width - 1
-        new_height = height if height % 2 == 0 else height - 1
-        image.resize((new_width, new_height, depth))
-    fl = face_recognition.face_locations(image)[0]
-    # calculate margins
-    margin_y_t = int((fl[2] - fl[0]) * .63 * .85)  # larger as the forehead is often cut off
-    margin_y_b = int((fl[2] - fl[0]) * .37 * .85)
-    margin_x = int((fl[1] - fl[3]) // (2 / .85))
-    margin_y_t += 2 * margin_x - margin_y_t - margin_y_b  # make sure square is preserved
-    l_y = max([fl[0] - margin_y_t, 0])
-    r_y = min([fl[2] + margin_y_b, image.shape[0]])
-    l_x = max([fl[3] - margin_x, 0])
-    r_x = min([fl[1] + margin_x, image.shape[1]])
-    # crop image
-    cropped_image = image[l_y:r_y, l_x:r_x, :]
-    # Resizing
-    orig_size = cropped_image.shape[:2]
-    cropped_image = transforms.ToTensor()(cropped_image)
-    cropped_image_resized = transforms.Resize(input_size, interpolation=Image.BILINEAR, antialias=True)(cropped_image)
-    source_age_channel = torch.full_like(cropped_image_resized[:1, :, :], source_age / 100)
-    target_age_channel = torch.full_like(cropped_image_resized[:1, :, :], target_age / 100)
-    input_tensor = torch.cat([cropped_image_resized, source_age_channel, target_age_channel], dim=0).unsqueeze(0)
-    image = transforms.ToTensor()(image)
-    if video:
-        # aging in steps
-        interval = .8 / steps
-        aged_cropped_images = torch.zeros((steps, 3, input_size[1], input_size[0]))
-        for i in range(0, steps):
-            input_tensor[:, -1, :, :] += interval
-            # performing actions on image
-            aged_cropped_images[i, ...] = sliding_window_tensor(input_tensor, window_size, stride, your_model)
-        # resize back to original size
-        aged_cropped_images_resized = transforms.Resize(orig_size, interpolation=Image.BILINEAR, antialias=True)(
-            aged_cropped_images)
-        # re-apply
-        image = image.repeat(steps, 1, 1, 1)
-        image[:, :, l_y:r_y, l_x:r_x] += aged_cropped_images_resized
-        image = torch.clamp(image, 0, 1)
-        image = (image * 255).to(torch.uint8)
-        output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-        write_video(output_file.name, image.permute(0, 2, 3, 1), 2)
-        return output_file.name
-    else:
-        # performing actions on image
-        aged_cropped_image = sliding_window_tensor(input_tensor, window_size, stride, your_model)
-        # resize back to original size
-        aged_cropped_image_resized = transforms.Resize(orig_size, interpolation=Image.BILINEAR, antialias=True)(
-            aged_cropped_image)
-        # re-apply
-        image[:, l_y:r_y, l_x:r_x] += aged_cropped_image_resized.squeeze(0)
-        image = torch.clamp(image, 0, 1)
-        return transforms.functional.to_pil_image(image)

utils/__init__.py DELETED Viewed

File without changes

utils/dataloader.py DELETED Viewed

@@ -1,63 +0,0 @@
-import torch
-from torch.utils.data import Dataset, DataLoader
-from torchvision import transforms
-from PIL import Image
-import os
-import random
-from pathlib import Path
-# Define the transformations
-transform = transforms.Compose([
-    transforms.RandomRotation(degrees=10),
-    transforms.RandomCrop(512),
-    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
-    transforms.ToTensor(),
-])
-class CustomDataset(Dataset):
-    def __init__(self, root_dir, transform=None):
-        self.root_dir = root_dir
-        self.transform = transform
-        self.image_folders = [folder for folder in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, folder))]
-    def __len__(self):
-        return len(self.image_folders)
-    def __getitem__(self, idx):
-        folder_name = self.image_folders[idx]
-        folder_path = os.path.join(self.root_dir, folder_name)
-        # # Get the list of image filenames in the folder
-        # image_filenames = [f"{i}.jpg" for i in range(0, 101, 10)]
-        image_filenames = os.listdir(folder_path)
-        # Pick two random assets from the folder
-        source_image_name, target_image_name = random.sample(image_filenames, 2)
-        # source_image_name, target_image_name = '20.jpg', '80.jpg'
-        source_age = int(Path(source_image_name).stem) / 100
-        target_age = int(Path(target_image_name).stem) / 100
-        # Randomly select two assets from the folder
-        source_image_path = os.path.join(folder_path, source_image_name)
-        target_image_path = os.path.join(folder_path, target_image_name)
-        source_image = Image.open(source_image_path).convert('RGB')
-        target_image = Image.open(target_image_path).convert('RGB')
-        # Apply the same random crop and augmentations to both assets
-        if self.transform:
-            seed = torch.randint(0, 2 ** 32 - 1, (1,)).item()
-            torch.manual_seed(seed)
-            source_image = self.transform(source_image)
-            torch.manual_seed(seed)
-            target_image = self.transform(target_image)
-        source_age_channel = torch.full_like(source_image[:1, :, :], source_age)
-        target_age_channel = torch.full_like(source_image[:1, :, :], target_age)
-        # Concatenate the age channels with the source_image
-        source_image = torch.cat([source_image, source_age_channel, target_age_channel], dim=0)
-        return source_image, target_image