import json
import gradio as gr
import torch
from transformers import EfficientFormerImageProcessor, EfficientFormerForImageClassificationWithTeacher
# Load preprocessor and pretrained model
model_name = "snap-research/efficientformer-l7-300"
processor = EfficientFormerImageProcessor.from_pretrained(model_name)
model = EfficientFormerForImageClassificationWithTeacher.from_pretrained(model_name)
# Load ImageNet idx to label mapping
with open("assets/imagenet_1000_idx2labels.json") as f:
    idx_to_label = json.load(f)
def classify_image(img, top_k):
    # Preprocess input image
    inputs = processor(images=img, return_tensors="pt")
    # Inference
    with torch.no_grad():
        outputs = model(**inputs)
    # Print the top ImageNet1k class prediction 
    logits = outputs.logits
    scores = torch.nn.functional.softmax(logits, dim=1)
    top_k_labels = scores.argsort(descending=True)[0][:top_k].cpu().detach().numpy()
    top_k_labels = list(top_k_labels)
    return {idx_to_label[str(idx)] : round(float(scores[0, idx]), 4) for idx in top_k_labels}
description = """
Gradio demo for EfficientFormer, 
introduced in EfficientFormer: Vision Transformers at MobileNet Speed. 
\n\nEfficientFormer is a mobile-friendly image classification model that achieves MobileNet inference speed with impressive performance gains. 
To use it, simply upload an image and print the top predictions.
"""
demo = gr.Interface(
    classify_image, 
    inputs=[gr.Image(), gr.Slider(0, 1000, value=5)], 
    outputs=gr.outputs.Label(),
    description=description,
    title="Image Classification with EfficientFormer-L1",
    examples=[
        ["assets/halloween-gaf8ad7ebc_1920.jpeg", 5],
        ["assets/IMG_4484.jpeg", 5],
        ["assets/IMG_4737.jpeg", 5],
        ["assets/IMG_4740.jpeg", 5],
    ],
)
demo.launch()