Spaces:

hysts
/

bizarre-pose-estimator-tagger

Running

File size: 3,033 Bytes

9bdd97c
 
 
 
 
 
f57e36f
9bdd97c
 
 
 
 
 
 
161720b
9bdd97c
 
 
161720b
9bdd97c
161720b
9bdd97c
 
f57e36f
161720b
f57e36f
161720b
 
f57e36f
fcd94e3
161720b
f57e36f
 
 
161720b
f57e36f
 
 
 
 
 
 
 
 
161720b
fcd94e3
 
9bdd97c
 
 
161720b
 
 
 
9bdd97c
 
 
 
 
 
fcd94e3
 
9bdd97c
 
 
 
 
 
ed7463d
 
 
161720b
ed7463d
 
 
7479a3a
 
fcd94e3
7479a3a
 
 
161720b
 
 
7479a3a
161720b
7479a3a
 
161720b

#!/usr/bin/env python

import functools
import os
import pathlib
import sys
import tarfile

import gradio as gr
import huggingface_hub
import PIL.Image
import torch
import torchvision

sys.path.insert(0, "bizarre-pose-estimator")

from _util.twodee_v0 import I as ImageWrapper

DESCRIPTION = "# [ShuhongChen/bizarre-pose-estimator (tagger)](https://github.com/ShuhongChen/bizarre-pose-estimator)"

MODEL_REPO = "public-data/bizarre-pose-estimator-models"


def load_sample_image_paths() -> list[pathlib.Path]:
    image_dir = pathlib.Path("images")
    if not image_dir.exists():
        dataset_repo = "hysts/sample-images-TADNE"
        path = huggingface_hub.hf_hub_download(dataset_repo, "images.tar.gz", repo_type="dataset")
        with tarfile.open(path) as f:
            f.extractall()  # noqa: S202
    return sorted(image_dir.glob("*"))


def load_model(device: torch.device) -> torch.nn.Module:
    path = huggingface_hub.hf_hub_download(MODEL_REPO, "tagger.pth")
    state_dict = torch.load(path)
    model = torchvision.models.resnet50(num_classes=1062)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    return model


def load_labels() -> list[str]:
    label_path = huggingface_hub.hf_hub_download(MODEL_REPO, "tags.txt")
    with pathlib.Path(label_path).open() as f:
        return [line.strip() for line in f]


@torch.inference_mode()
def predict(
    image: PIL.Image.Image, score_threshold: float, device: torch.device, model: torch.nn.Module, labels: list[str]
) -> dict[str, float]:
    data = ImageWrapper(image).resize_square(256).alpha_bg(c="w").convert("RGB").tensor()
    data = data.to(device).unsqueeze(0)

    preds = model(data)[0]
    preds = torch.sigmoid(preds)
    preds = preds.cpu().numpy().astype(float)

    res = {}
    for prob, label in zip(preds.tolist(), labels, strict=True):
        if prob < score_threshold:
            continue
        res[label] = prob
    return res


image_paths = load_sample_image_paths()
examples = [[path.as_posix(), 0.5] for path in image_paths]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = load_model(device)
labels = load_labels()

fn = functools.partial(predict, device=device, model=model, labels=labels)

with gr.Blocks(css_paths="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Row():
        with gr.Column():
            image = gr.Image(label="Input", type="pil")
            threshold = gr.Slider(label="Score Threshold", minimum=0, maximum=1, step=0.05, value=0.5)
            run_button = gr.Button("Run")
        with gr.Column():
            result = gr.Label(label="Output")

    inputs = [image, threshold]
    gr.Examples(
        examples=examples,
        inputs=inputs,
        outputs=result,
        fn=fn,
        cache_examples=os.getenv("CACHE_EXAMPLES") == "1",
    )
    run_button.click(
        fn=fn,
        inputs=inputs,
        outputs=result,
        api_name="predict",
    )

if __name__ == "__main__":
    demo.queue(max_size=15).launch()