import gradio as gr | |
import numpy as np | |
from time import sleep | |
import torch | |
import cv2 | |
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation | |
# from torchvision import transforms | |
class Counter: | |
def __init__(self): | |
self.count = 0 | |
def increment(self): | |
self.count += 1 | |
return self.count | |
counter = Counter() | |
cnt = 0 | |
weights2load = 'segformer_ep15_loss0.00.pth' | |
id2label = {0: 'seal', 255: 'bck'} | |
label2id = {'seal': 0, 'bck': 255} | |
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0", | |
num_labels=2, | |
id2label=id2label, | |
label2id=label2id, | |
) | |
image_processor = SegformerImageProcessor(reduce_labels=True) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.load_state_dict(torch.load(weights2load, weights_only=True, map_location=device)) | |
model.to(device).eval() | |
# counter = Count() | |
def segment(im, interval_s=2): | |
# im = cv2.resize(im, (im.shape[1] // 2, im.shape[0] // 2)) | |
imout = im.copy() | |
# if counter.increment() % 3 == 0: | |
# # if (counter.imout.sum() == 0) or ((cnt % 100) == 0): | |
# pixel_values = image_processor(im, return_tensors="pt").pixel_values.to(device) | |
# outputs = model(pixel_values=pixel_values) | |
# logits = outputs.logits.cpu().detach().numpy() ** 2 | |
# logits_n = (logits[0, 0] - logits[0, 0].min()) / (logits[0, 0].max() - logits[0, 0].min()) | |
# logits_n = cv2.resize(logits_n, (im.shape[1], im.shape[0])) | |
# imout[..., 1] = np.clip(imout[..., 1] + logits_n * 200, 0, 254) | |
return imout, counter.count #np.flipud(im) | |
# with gr.Blocks() as demo: | |
# inp = gr.Image(sources=["webcam"], streaming=True) | |
# inp.stream(segment, inputs=inp, outputs=[gr.Image()]) | |
demo = gr.Interface( | |
segment, | |
[gr.Image(sources=["webcam"], streaming=True)], | |
[gr.Image(), gr.Number()], | |
css=".output-image, .input-image, .image-preview {height: 400px !important}", | |
live=True | |
) | |
# with gr.Blocks() as demo: | |
# inp = gr.Image(sources=["webcam"], streaming=True) | |
# out = gr.Image() | |
# inp.stream(segment, inputs=inp, outputs=out) | |
if __name__ == "__main__": | |
demo.launch() | |
# from gradio_webrtc import WebRTC | |
# css = """.my-group {max-width: 600px !important; max-height: 600px !important;} | |
# .my-column {display: flex !important; justify-content: center !important; align-items: center !important;}""" | |
# with gr.Blocks(css=css) as demo: | |
# gr.HTML( | |
# ) | |
# with gr.Column(elem_classes=["my-column"]): | |
# with gr.Group(elem_classes=["my-group"]): | |
# image = WebRTC(label="Stream") | |
# image.stream(fn=segment, inputs=[image], outputs=[image]) | |
# demo = gr.Interface( | |
# fn=segment, | |
# inputs=[gr.Image(sources=["webcam"], streaming=True)], | |
# outputs=["image"], | |
# title="Image Inference", | |
# cache_examples=False, | |
# live=True | |
# ) | |
# if __name__ == "__main__": | |
# demo.queue().launch() |