Spaces:

iamkb
/

zsic_unicl

Runtime error

App Files Files Community

Kaushik Bar commited on Jun 22, 2022

Commit

523248f

1 Parent(s): a7e927b

zsic_unicl

Browse files

Files changed (2) hide show

app.py +143 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import argparse
+import requests
+import gradio as gr
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+from PIL import Image
+from torchvision import transforms
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from timm.data import create_transform
+from config import get_config
+from model import build_model
+# Download human-readable labels for ImageNet.
+response = requests.get("https://git.io/JJkYN")
+labels = response.text.split("\n")
+def parse_option():
+    parser = argparse.ArgumentParser('UniCL demo script', add_help=False)
+    parser.add_argument('--cfg', type=str, default="configs/unicl_swin_base.yaml", metavar="FILE", help='path to config file', )
+    args, unparsed = parser.parse_known_args()
+    config = get_config(args)
+    return args, config
+def build_transforms(img_size, center_crop=True):
+    t = [transforms.ToPILImage()]
+    if center_crop:
+        size = int((256 / 224) * img_size)
+        t.append(
+            transforms.Resize(size)
+        )
+        t.append(
+            transforms.CenterCrop(img_size)
+        )
+    else:
+        t.append(
+            transforms.Resize(img_size)
+        )
+    t.append(transforms.ToTensor())
+    t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
+    return transforms.Compose(t)
+def build_transforms4display(img_size, center_crop=True):
+    t = [transforms.ToPILImage()]
+    if center_crop:
+        size = int((256 / 224) * img_size)
+        t.append(
+            transforms.Resize(size)
+        )
+        t.append(
+            transforms.CenterCrop(img_size)
+        )
+    else:
+        t.append(
+            transforms.Resize(img_size)
+        )
+    t.append(transforms.ToTensor())
+    return transforms.Compose(t)
+args, config = parse_option()
+'''
+build model
+'''
+model = build_model(config)
+url = './in21k_yfcc14m_gcc15m_swin_base.pth'
+checkpoint = torch.load(url, map_location="cpu")
+model.load_state_dict(checkpoint["model"])
+model.eval()
+'''
+build data transform
+'''
+eval_transforms = build_transforms(224, center_crop=True)
+display_transforms = build_transforms4display(224, center_crop=True)
+'''
+build upsampler
+'''
+# upsampler = nn.Upsample(scale_factor=16, mode='bilinear')
+'''
+borrow code from here: https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/utils/image.py
+'''
+def show_cam_on_image(img: np.ndarray,
+                      mask: np.ndarray,
+                      use_rgb: bool = False,
+                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
+    """ This function overlays the cam mask on the image as an heatmap.
+    By default the heatmap is in BGR format.
+    :param img: The base image in RGB or BGR format.
+    :param mask: The cam mask.
+    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
+    :param colormap: The OpenCV colormap to be used.
+    :returns: The default image with the cam overlay.
+    """
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
+    if use_rgb:
+        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
+    heatmap = np.float32(heatmap) / 255
+    if np.max(img) > 1:
+        raise Exception(
+            "The input image should np.float32 in the range [0, 1]")
+    cam = 0.7*heatmap + 0.3*img
+    # cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def recognize_image(image, texts):
+    img_t = eval_transforms(image)
+    img_d = display_transforms(image).permute(1, 2, 0).numpy()
+    text_embeddings = model.get_text_embeddings(texts.split(';'))
+    # compute output
+    feat_img = model.encode_image(img_t.unsqueeze(0))
+    output = model.logit_scale.exp() * feat_img @ text_embeddings.t()
+    prediction = output.softmax(-1).flatten()
+    return {texts.split(';')[i]: float(prediction[i]) for i in range(len(texts.split(';')))}
+image = gr.inputs.Image()
+label = gr.outputs.Label(num_top_classes=100)
+gr.Interface(
+    description="UniCL for Zero-shot Image Recognition Demo (https://github.com/microsoft/unicl)",
+    fn=recognize_image,
+    inputs=["image", "text"],
+    outputs=[
+        label,
+    ],
+    examples=[
+    ["./elephants.png", "an elephant; an elephant walking in the river; four elephants walking in the river"],
+    ["./apple_with_ipod.jpg", "an ipod; an apple with a write note 'ipod'; an apple"],
+    ["./crowd2.jpg", "a street; a street with a woman walking in the middle; a street with a man walking in the  middle"],
+    ],
+).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch==1.10.1
+torchvision==0.11.2
+opencv-python-headless==4.5.3.56
+timm==0.4.12
+numpy
+yacs
+transformers