from transformers import ViTFeatureExtractor, ViTForImageClassification
from hugsvision.inference.VisionClassifierInference import VisionClassifierInference
import gradio as gr
import cv2
import numpy as np
from PIL import Image

# Load the pretrained ViT model and feature extractor
path = "mrm8488/vit-base-patch16-224_finetuned-kvasirv2-colonoscopy"
feature_extractor = ViTFeatureExtractor.from_pretrained(path)
model = ViTForImageClassification.from_pretrained(path)

# Create a VisionClassifierInference instance
classifier = VisionClassifierInference(
    feature_extractor=feature_extractor,
    model=model,
)

# Define a function to classify and overlay the label on the image
def classify_image_with_overlay(img):
    # Convert the numpy array image to a PIL image
    img_pil = Image.fromarray(img)

    # Predict the label
    label = classifier.predict_image(img=img_pil)

    # Load the image using OpenCV
    image = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

    # Add a white rectangle for the label
    font = cv2.FONT_HERSHEY_SIMPLEX
    org = (10, 30)
    font_scale = 1
    color = (255, 255, 255)  # White color
    thickness = 2
    text_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    cv2.rectangle(image, (org[0] - 10, org[1] - text_size[1] - 10), (org[0] + text_size[0], org[1]), color, cv2.FILLED)

    # Put the label text on the white rectangle
    cv2.putText(image, label, org, font, font_scale, (0, 0, 0), thickness, cv2.LINE_AA)

    return image

iface = gr.Interface(
    fn=classify_image_with_overlay,
    inputs=gr.inputs.Image(),
    outputs=gr.outputs.Image(type="numpy", output_width=400, output_height=400),  # Ajuste o tamanho da saída desejado
    live=True,
    title="ViT Image Classifier with Overlay",
    description="Upload an image for classification with label overlay.",
)


if __name__ == "__main__":
    iface.launch()