Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import ViTForImageClassification, ViTModel, ViTImageProcessor | |
from PIL import Image | |
import PIL | |
import io | |
from sklearn.preprocessing import LabelEncoder | |
import json | |
def greet(name): | |
return "Hello " + name + "!!" | |
async def test2(file, top_k: int = 5): | |
# extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png") | |
# if not extension: | |
# return "Image format must be jpg, jpeg, or png!" | |
# # Read image contents | |
# contents = await file.read() | |
# Preprocess image | |
# image_tensor = preprocess_image(contents) | |
image_tensor = preprocess_image(file) | |
# Make predictions | |
predictions = predict(image_tensor, top_k) | |
item = {"predictions": predictions} | |
return json.dumps(item) | |
encoder = LabelEncoder() | |
encoder.classes_ = np.load('encoder.npy', allow_pickle=True) | |
pretrained_model = ViTModel.from_pretrained('pillIdentifierAI/pillIdentifier') | |
feature_extractor = ViTImageProcessor( | |
image_size=224, | |
do_resize=True, | |
do_normalize=True, | |
do_rescale=False, | |
image_mean=[0.5, 0.5, 0.5], | |
image_std=[0.5, 0.5, 0.5], | |
) | |
config = pretrained_model.config | |
config.num_labels = 2112 # Change this to the appropriate number of classes | |
model = ViTForImageClassification(config) | |
model.vit = pretrained_model | |
model.eval() | |
# def preprocess_image(contents): | |
def preprocess_image(image): | |
# Convert image bytes to PIL Image | |
# image = Image.open(io.BytesIO(contents)) | |
image = Image.fromarray(np.uint8(image)) | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
# Use the feature extractor directly | |
inputs = feature_extractor(images=[image]) | |
image_tensor = inputs['pixel_values'][0] | |
# Convert to tensor | |
image_tensor = torch.tensor(image_tensor, dtype=torch.float32) | |
return image_tensor | |
def predict(image_tensor, top_k=5): | |
# Ensure the model is in evaluation mode | |
model.eval() | |
# Make prediction | |
with torch.no_grad(): | |
outputs = model(pixel_values=image_tensor.unsqueeze(0)) # Add batch dimension | |
logits = outputs.logits.numpy() | |
# Get top k predictions and their probabilities | |
predictions = np.argsort(logits, axis=1)[:, ::-1][:, :top_k] | |
probabilities = np.sort(logits, axis=1)[:, ::-1][:, :top_k] | |
# Decode predictions using the label encoder and create the result dictionary | |
result = {} | |
for i in range(top_k): | |
class_name = encoder.inverse_transform([predictions[0][i]])[0] | |
probability = probabilities[0][i] | |
result[i + 1] = {'label': str(class_name), 'probability': float(probability)} | |
return result | |
iface = gr.Interface(fn=test2, inputs="image", outputs="text") | |
iface.launch(share=True) |