|
from transformers import ViTFeatureExtractor, ViTForImageClassification |
|
from hugsvision.inference.VisionClassifierInference import VisionClassifierInference |
|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
|
|
|
|
path = "mrm8488/vit-base-patch16-224_finetuned-kvasirv2-colonoscopy" |
|
feature_extractor = ViTFeatureExtractor.from_pretrained(path) |
|
model = ViTForImageClassification.from_pretrained(path) |
|
|
|
|
|
classifier = VisionClassifierInference( |
|
feature_extractor=feature_extractor, |
|
model=model, |
|
) |
|
|
|
|
|
def classify_image_with_overlay(img): |
|
|
|
label = classifier.predict(img_path=img) |
|
|
|
|
|
image = cv2.imread(img) |
|
|
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
org = (10, 30) |
|
font_scale = 1 |
|
color = (255, 255, 255) |
|
thickness = 2 |
|
text_size = cv2.getTextSize(label, font, font_scale, thickness)[0] |
|
cv2.rectangle(image, (org[0] - 10, org[1] - text_size[1] - 10), (org[0] + text_size[0], org[1]), color, cv2.FILLED) |
|
|
|
|
|
cv2.putText(image, label, org, font, font_scale, (0, 0, 0), thickness, cv2.LINE_AA) |
|
|
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
return image_rgb |
|
|
|
iface = gr.Interface( |
|
fn=classify_image_with_overlay, |
|
inputs=gr.inputs.Image(), |
|
outputs=gr.outputs.Image(), |
|
live=True, |
|
title="ViT Image Classifier with Overlay", |
|
description="Upload an image for classification with label overlay.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|