Spaces:

Inf009
/

watermark_detection

Runtime error

App Files Files Community

baixintech_zhangyiming_prod commited on Oct 26, 2023

Commit

7dd7207

1 Parent(s): 90e2119

init

Browse files

Files changed (18) hide show

.gitignore +6 -0
README.md +1 -1
app.py +24 -0
images/clean/3.png +0 -0
images/watermark/1.png +0 -0
images/watermark/2.png +0 -0
requirements.txt +9 -0
wmdetection/__init__.py +0 -0
wmdetection/dataset/__init__.py +0 -0
wmdetection/dataset/synthetic_wm.py +211 -0
wmdetection/models/__init__.py +86 -0
wmdetection/models/convnext.py +200 -0
wmdetection/pipelines/__init__.py +0 -0
wmdetection/pipelines/metrics.py +9 -0
wmdetection/pipelines/predictor.py +73 -0
wmdetection/utils/__init__.py +2 -0
wmdetection/utils/files.py +26 -0
wmdetection/utils/fp16module.py +64 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__/
+.ipynb_checkpoints/
+dataset/*.csv
+dataset/watermarks-validation/
+weights/
+model_files/

README.md CHANGED Viewed

@@ -10,4 +10,4 @@ pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 license: apache-2.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import gradio as gr
+from wmdetection.models import get_watermarks_detection_model
+from wmdetection.pipelines.predictor import WatermarksPredictor
+import os, glob
+model, transforms = get_watermarks_detection_model(
+    'convnext-tiny',
+    fp16=False,
+    cache_dir='model_files'
+)
+predictor = WatermarksPredictor(model, transforms, 'cuda:0')
+def predict(image):
+    result = predictor.predict_image(image)
+    return 'watermarked' if result else 'clean' # prints "watermarked"
+examples = glob.glob(os.path.join('images', 'clean', '*'))
+examples.extend(glob.glob(os.path.join('images', 'watermark', '*')))
+iface = gr.Interface(fn=predict, inputs=[gr.inputs.Image(type="pil")],
+             examples=examples, outputs="text")
+iface.launch()

images/clean/3.png ADDED Viewed

images/watermark/1.png ADDED Viewed

images/watermark/2.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+torchvision
+pillow
+numpy
+matplotlib
+tqdm
+huggingface-hub
+opencv-python
+timm>=0.6.12

wmdetection/__init__.py ADDED Viewed

File without changes

wmdetection/dataset/__init__.py ADDED Viewed

File without changes

wmdetection/dataset/synthetic_wm.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import os
+from PIL import Image
+import numpy as np
+import pandas as pd
+import cv2
+import string
+import random
+CV2_FONTS = [
+    #cv2.FONT_HERSHEY_COMPLEX,
+    cv2.FONT_HERSHEY_COMPLEX_SMALL,
+    cv2.FONT_HERSHEY_DUPLEX,
+    cv2.FONT_HERSHEY_PLAIN,
+    cv2.FONT_HERSHEY_SIMPLEX,
+    cv2.FONT_HERSHEY_TRIPLEX,
+    cv2.FONT_ITALIC,
+    cv2.QT_FONT_BLACK,
+    cv2.QT_FONT_NORMAL
+]
+# рандомный float между x и y
+def random_float(x, y):
+    return random.random()*(y-x)+x
+# вычисляет размер текста в пикселях для cv2.putText
+def get_text_size(text, font, font_scale, thickness):
+    (w, h), baseline = cv2.getTextSize(text, font, font_scale, thickness)
+    return w, h+baseline
+# вычисляет какой нужен font_scale для определенного размера текста (по высоте)
+def get_font_scale(needed_height, text, font, thickness):
+    w, h = get_text_size(text, font, 1, thickness)
+    return needed_height/h
+# добавляет текст на изображение
+def place_text(image, text, color=(255,255,255), alpha=1, position=(0, 0), angle=0,
+               font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1.0, thickness=3):
+    image = np.array(image)
+    overlay = np.zeros_like(image)
+    output = image.copy()
+    cv2.putText(overlay, text, position, font, font_scale, color, thickness)
+    if angle != 0:
+        text_w, text_h = get_text_size(text, font, font_scale, thickness)
+        rotate_M = cv2.getRotationMatrix2D((position[0]+text_w//2, position[1]-text_h//2), angle, 1)
+        overlay = cv2.warpAffine(overlay, rotate_M, (overlay.shape[1], overlay.shape[0]))
+    overlay[overlay==0] = image[overlay==0]
+    cv2.addWeighted(overlay, alpha, output, 1-alpha, 0, output)
+    return Image.fromarray(output)
+def get_random_font_params(text, text_height, fonts, font_thickness_range):
+    font = random.choice(fonts)
+    font_thickness_range_scaled = [int(font_thickness_range[0]*(text_height/35)),
+                                   int(font_thickness_range[1]*(text_height/85))]
+    try:
+        font_thickness = min(random.randint(*font_thickness_range_scaled), 2)
+    except ValueError:
+        font_thickness = 2
+    font_scale = get_font_scale(text_height, text, font, font_thickness)
+    return font, font_scale, font_thickness
+# устанавливает вотермарку в центре изображения с рандомными параметрами
+def place_random_centered_watermark(
+        pil_image,
+        text,
+        center_point_range_shift=(-0.025, 0.025),
+        random_angle=(0,0),
+        text_height_in_percent_range=(0.15, 0.18),
+        text_alpha_range=(0.23, 0.5),
+        fonts=CV2_FONTS,
+        font_thickness_range=(2, 7),
+        colors=[(255,255,255)]
+    ):
+    w, h = pil_image.size
+    position_shift_x = random_float(*center_point_range_shift)
+    offset_x = int(w*position_shift_x)
+    position_shift_y = random_float(*center_point_range_shift)
+    offset_y = int(w*position_shift_y)
+    text_height = int(h*random_float(*text_height_in_percent_range))
+    font, font_scale, font_thickness = get_random_font_params(text, text_height, fonts, font_thickness_range)
+    text_width, _ = get_text_size(text, font, font_scale, font_thickness)
+    position_x = int((w/2)-text_width/2+offset_x)
+    position_y = int((h/2)+text_height/2+offset_y)
+    return place_text(
+        pil_image,
+        text,
+        color=random.choice(colors),
+        alpha=random_float(*text_alpha_range),
+        position=(position_x, position_y),
+        angle=random.randint(*random_angle),
+        thickness=font_thickness,
+        font=font,
+        font_scale=font_scale
+    )
+def place_random_watermark(
+        pil_image,
+        text,
+        random_angle=(0,0),
+        text_height_in_percent_range=(0.10, 0.18),
+        text_alpha_range=(0.18, 0.4),
+        fonts=CV2_FONTS,
+        font_thickness_range=(2, 6),
+        colors=[(255,255,255)]
+    ):
+    w, h = pil_image.size
+    text_height = int(h*random_float(*text_height_in_percent_range))
+    font, font_scale, font_thickness = get_random_font_params(text, text_height, fonts, font_thickness_range)
+    text_width, _ = get_text_size(text, font, font_scale, font_thickness)
+    position_x = random.randint(0, max(w-text_width, 10))
+    position_y = random.randint(text_height, h)
+    return place_text(
+            pil_image,
+            text,
+            color=random.choice(colors),
+            alpha=random_float(*text_alpha_range),
+            position=(position_x, position_y),
+            angle=random.randint(*random_angle),
+            thickness=font_thickness,
+            font=font,
+            font_scale=font_scale
+        )
+def center_crop(image, w, h):
+    center = image.shape
+    x = center[1]/2 - w/2
+    y = center[0]/2 - h/2
+    return image[int(y):int(y+h), int(x):int(x+w)]
+# добавляет текст в шахматном порядке на изображение
+def place_text_checkerboard(image, text, color=(255,255,255), alpha=1, step_x=0.1, step_y=0.1, angle=0,
+                            font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=1.0, thickness=3):
+    image_size = image.size
+    image = np.array(image.convert('RGB'))
+    if angle != 0:
+        border_scale = 0.4
+        overlay_size = [int(i*(1+border_scale)) for i in list(image_size)]
+    else:
+        overlay_size = image_size
+    w, h = overlay_size
+    overlay = np.zeros((overlay_size[1], overlay_size[0], 3)) # change dimensions
+    output = image.copy()
+    text_w, text_h = get_text_size(text, font, font_scale, thickness)
+    c = 0
+    for rel_pos_x in np.arange(0, 1, step_x):
+        c += 1
+        for rel_pos_y in np.arange(text_h/h+(c%2)*step_y/2, 1, step_y):
+            position = (int(w*rel_pos_x), int(h*rel_pos_y))
+            cv2.putText(overlay, text, position, font, font_scale, color, thickness)
+    if angle != 0:
+        rotate_M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1)
+        overlay = cv2.warpAffine(overlay, rotate_M, (overlay.shape[1], overlay.shape[0]))
+    overlay = center_crop(overlay, image_size[0], image_size[1])
+    overlay[overlay==0] = image[overlay==0]
+    overlay = overlay.astype(np.uint8)
+    cv2.addWeighted(overlay, alpha, output, 1-alpha, 0, output)
+    return Image.fromarray(output)
+def place_random_diagonal_watermark(
+        pil_image,
+        text,
+        random_step_x=(0.25, 0.4),
+        random_step_y=(0.25, 0.4),
+        random_angle=(-60,60),
+        text_height_in_percent_range=(0.10, 0.18),
+        text_alpha_range=(0.18, 0.4),
+        fonts=CV2_FONTS,
+        font_thickness_range=(2, 6),
+        colors=[(255,255,255)]
+    ):
+    w, h = pil_image.size
+    text_height = int(h*random_float(*text_height_in_percent_range))
+    font, font_scale, font_thickness = get_random_font_params(text, text_height, fonts, font_thickness_range)
+    text_width, _ = get_text_size(text, font, font_scale, font_thickness)
+    return place_text_checkerboard(
+            pil_image,
+            text,
+            color=random.choice(colors),
+            alpha=random_float(*text_alpha_range),
+            step_x=random_float(*random_step_x),
+            step_y=random_float(*random_step_y),
+            angle=random.randint(*random_angle),
+            thickness=font_thickness,
+            font=font,
+            font_scale=font_scale
+        )

wmdetection/models/__init__.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+import torch
+import torch.nn as nn
+from torchvision import models, transforms
+from huggingface_hub import hf_hub_url, hf_hub_download
+from .convnext import ConvNeXt
+from wmdetection.utils import FP16Module
+def get_convnext_model(name):
+    if name == 'convnext-tiny':
+        model_ft = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768])
+        model_ft.head = nn.Sequential(
+            nn.Linear(in_features=768, out_features=512),
+            nn.GELU(),
+            nn.Linear(in_features=512, out_features=256),
+            nn.GELU(),
+            nn.Linear(in_features=256, out_features=2),
+        )
+    detector_transforms = transforms.Compose([
+        transforms.Resize((256, 256)),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    return model_ft, detector_transforms
+def get_resnext_model(name):
+    if name == 'resnext50_32x4d-small':
+        model_ft = models.resnext50_32x4d(pretrained=False)
+    elif name == 'resnext101_32x8d-large':
+        model_ft = models.resnext101_32x8d(pretrained=False)
+    num_ftrs = model_ft.fc.in_features
+    model_ft.fc = nn.Linear(num_ftrs, 2)
+    detector_transforms = transforms.Compose([
+        transforms.Resize((320, 320)),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    return model_ft, detector_transforms
+def get_watermarks_detection_model(name, device='cpu', fp16=True, pretrained=True, cache_dir='/tmp/watermark-detection'):
+    assert name in MODELS, f"Unknown model name: {name}"
+    assert not (fp16 and name.startswith('convnext')), "Can`t use fp16 mode with convnext models"
+    config = MODELS[name]
+    model_ft, detector_transforms = config['constructor'](name)
+    if pretrained:
+        hf_hub_download(repo_id=config['repo_id'], filename=config['filename'],
+                        cache_dir=cache_dir, force_filename=config['filename'])
+        weights = torch.load(os.path.join(cache_dir, config['filename']), device)
+        model_ft.load_state_dict(weights)
+    if fp16:
+        model_ft = FP16Module(model_ft)
+    model_ft.eval()
+    model_ft = model_ft.to(device)
+    return model_ft, detector_transforms
+MODELS = {
+    'convnext-tiny': dict(
+        constructor=get_convnext_model,
+        repo_id='boomb0om/watermark-detectors',
+        filename='convnext-tiny_watermarks_detector.pth',
+    ),
+    'resnext101_32x8d-large': dict(
+        constructor=get_resnext_model,
+        repo_id='boomb0om/watermark-detectors',
+        filename='watermark_classifier-resnext101_32x8d-input_size320-4epochs_c097_w082.pth',
+    ),
+    'resnext50_32x4d-small': dict(
+        constructor=get_resnext_model,
+        repo_id='boomb0om/watermark-detectors',
+        filename='watermark_classifier-resnext50_32x4d-input_size320-4epochs_c082_w078.pth',
+    )
+}

wmdetection/models/convnext.py ADDED Viewed

	@@ -0,0 +1,200 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+from timm.models.registry import register_model
+class Block(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXt(nn.Module):
+    r""" ConvNeXt
+        A PyTorch impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.dims = dims
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        self.head = nn.Linear(dims[-1], num_classes)
+        self.apply(self._init_weights)
+        self.head.weight.data.mul_(head_init_scale)
+        self.head.bias.data.mul_(head_init_scale)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+model_urls = {
+    "convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
+    "convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
+    "convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
+    "convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
+    "convnext_tiny_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_224.pth",
+    "convnext_small_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth",
+    "convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth",
+    "convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth",
+    "convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth",
+}
+def convnext_tiny(pretrained=False,in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_tiny_22k'] if in_22k else model_urls['convnext_tiny_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
+        model.load_state_dict(checkpoint["model"])
+    return model
+def convnext_small(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_small_22k'] if in_22k else model_urls['convnext_small_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+def convnext_base(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_base_22k'] if in_22k else model_urls['convnext_base_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+def convnext_large(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_large_22k'] if in_22k else model_urls['convnext_large_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+def convnext_xlarge(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
+    if pretrained:
+        assert in_22k, "only ImageNet-22K pre-trained ConvNeXt-XL is available; please set in_22k=True"
+        url = model_urls['convnext_xlarge_22k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model

wmdetection/pipelines/__init__.py ADDED Viewed

File without changes

wmdetection/pipelines/metrics.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import pandas as pd
+import numpy as np
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+def plot_confusion_matrix(x: np.ndarray, y: np.ndarray):
+    cm = confusion_matrix(x, y)
+    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['clean', 'watermark'])
+    return disp.plot()

wmdetection/pipelines/predictor.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+from tqdm import tqdm
+from PIL import Image
+import numpy as np
+import torch
+import torchvision
+from torch.utils.data import Dataset
+from torch.utils.data import BatchSampler, DataLoader
+from wmdetection.utils import read_image_rgb
+class ImageDataset(Dataset):
+    def __init__(self, objects, classifier_transforms):
+        self.objects = objects
+        self.classifier_transforms = classifier_transforms
+    def __len__(self):
+        return len(self.objects)
+    def __getitem__(self, idx):
+        obj = self.objects[idx]
+        assert isinstance(obj, (str, np.ndarray, Image.Image))
+        if isinstance(obj, str):
+            pil_img = read_image_rgb(obj)
+        elif isinstance(obj, np.ndarray):
+            pil_img = Image.fromarray(obj)
+        elif isinstance(obj, Image.Image):
+            pil_img = obj
+        resnet_img = self.classifier_transforms(pil_img).float()
+        return resnet_img
+class WatermarksPredictor:
+    def __init__(self, wm_model, classifier_transforms, device):
+        self.wm_model = wm_model
+        self.wm_model.eval()
+        self.classifier_transforms = classifier_transforms
+        self.device = device
+    def predict_image(self, pil_image):
+        pil_image = pil_image.convert("RGB")
+        input_img = self.classifier_transforms(pil_image).float().unsqueeze(0)
+        outputs = self.wm_model(input_img.to(self.device))
+        result = torch.max(outputs, 1)[1].cpu().reshape(-1).tolist()[0]
+        return result
+    def run(self, files, num_workers=8, bs=8, pbar=True):
+        eval_dataset = ImageDataset(files, self.classifier_transforms)
+        loader = DataLoader(
+            eval_dataset,
+            sampler=torch.utils.data.SequentialSampler(eval_dataset),
+            batch_size=bs,
+            drop_last=False,
+            num_workers=num_workers
+        )
+        if pbar:
+            loader = tqdm(loader)
+        result = []
+        for batch in loader:
+            with torch.no_grad():
+                outputs = self.wm_model(batch.to(self.device))
+                result.extend(torch.max(outputs, 1)[1].cpu().reshape(-1).tolist())
+        return result

wmdetection/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .files import get_extenstion, listdir_rec, list_images, read_image_rgb
2	+ from .fp16module import FP16Module

wmdetection/utils/files.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+from PIL import Image
+IMAGE_EXT = set(['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG'])
+def get_extenstion(filepath):
+    return os.path.splitext(filepath)[-1]
+def listdir_rec(folder_path):
+    filepaths = []
+    for root, dirname, files in os.walk(folder_path):
+        for file in files:
+            filepaths.append(os.path.join(root, file))
+    return filepaths
+def list_images(folder_path):
+    files = listdir_rec(folder_path)
+    return [f for f in files if get_extenstion(f) in IMAGE_EXT]
+def read_image_rgb(path):
+    pil_img = Image.open(path)
+    pil_img.load()
+    if pil_img.format is 'PNG' and pil_img.mode is not 'RGBA':
+        pil_img = pil_img.convert('RGBA')
+    pil_img = pil_img.convert('RGB')
+    return pil_img

wmdetection/utils/fp16module.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# -*- coding: utf-8 -*-
+import torch
+from torch import nn
+from torch.autograd import Variable
+from torch.nn.parameter import Parameter
+FLOAT_TYPES = (torch.FloatTensor, torch.cuda.FloatTensor)
+HALF_TYPES = (torch.HalfTensor, torch.cuda.HalfTensor)
+def conversion_helper(val, conversion):
+    """Apply conversion to val. Recursively apply conversion if `val` is a nested tuple/list structure."""
+    if not isinstance(val, (tuple, list)):
+        return conversion(val)
+    rtn = [conversion_helper(v, conversion) for v in val]
+    if isinstance(val, tuple):
+        rtn = tuple(rtn)
+    return rtn
+def fp32_to_fp16(val):
+    """Convert fp32 `val` to fp16"""
+    def half_conversion(val):
+        val_typecheck = val
+        if isinstance(val_typecheck, (Parameter, Variable)):
+            val_typecheck = val.data
+        if isinstance(val_typecheck, FLOAT_TYPES):
+            val = val.half()
+        return val
+    return conversion_helper(val, half_conversion)
+def fp16_to_fp32(val):
+    """Convert fp16 `val` to fp32"""
+    def float_conversion(val):
+        val_typecheck = val
+        if isinstance(val_typecheck, (Parameter, Variable)):
+            val_typecheck = val.data
+        if isinstance(val_typecheck, HALF_TYPES):
+            val = val.float()
+        return val
+    return conversion_helper(val, float_conversion)
+class FP16Module(nn.Module):
+    def __init__(self, module):
+        super(FP16Module, self).__init__()
+        self.add_module('module', module.half())
+    def forward(self, *inputs, **kwargs):
+        return fp16_to_fp32(self.module(*(fp32_to_fp16(inputs)), **kwargs))
+    def state_dict(self, destination=None, prefix='', keep_vars=False):
+        return self.module.state_dict(destination, prefix, keep_vars)
+    def load_state_dict(self, state_dict, strict=True):
+        self.module.load_state_dict(state_dict, strict=strict)
+    def get_param(self, item):
+        return self.module.get_param(item)
+    def to(self, device, *args, **kwargs):
+        self.module.to(device)
+        return super().to(device, *args, **kwargs)