import gradio as gr
import os
import torch

from model import create_vit_model
from timeit import default_timer as timer

class_names = ["pizza", "steak", "sushi"]

vit , vit_transforms = create_vit_model()

vit.load_state_dict(torch.load(f="09_pretrained_vit_feature_extractor_pizza_steak_sushi_20_percent.pth",
                    map_location=torch.device("cpu")))

def predict(img):

  img_tranformed = vit_transforms(img).unsqueeze(0)

  start_time = timer()
  vit.eval()
  with torch.inference_mode():
    y_pred = vit(img_tranformed)

  pred_time = round(timer() - start_time  , 4)
  y_proba = torch.softmax(y_pred , dim =1)

  pred_dict = { class_names[i]:j for i, j in enumerate( y_proba[0]) }

  return pred_dict , pred_time


title = "FoodVision Mini 🍕🥩🍣"
description = "An VITfeature extractor computer vision model to classify images of food as pizza, steak or sushi."
article = "Created at [PyTorch Model Deployment]."

# Create examples list from "examples/" directory
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create the Gradio demo
demo = gr.Interface(fn=predict, # mapping function from input to output
                    inputs=gr.Image(type="pil"), # what are the inputs?
                    outputs=[gr.Label(num_top_classes=3, label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
                    examples=example_list,
                    title=title,
                    description=description,
                    article=article)

# Launch the demo!
demo.launch()