pill_identifier / app.py
CXDJY's picture
added application
fb98024
raw
history blame
2.81 kB
import gradio as gr
import torch
import numpy as np
from transformers import ViTForImageClassification, ViTModel, ViTImageProcessor
from PIL import Image
import PIL
import io
from sklearn.preprocessing import LabelEncoder
import json
def greet(name):
return "Hello " + name + "!!"
async def test2(file, top_k: int = 5):
# extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
# if not extension:
# return "Image format must be jpg, jpeg, or png!"
# # Read image contents
# contents = await file.read()
# Preprocess image
# image_tensor = preprocess_image(contents)
image_tensor = preprocess_image(file)
# Make predictions
predictions = predict(image_tensor, top_k)
item = {"predictions": predictions}
return json.dumps(item)
encoder = LabelEncoder()
encoder.classes_ = np.load('encoder.npy', allow_pickle=True)
pretrained_model = ViTModel.from_pretrained('pillIdentifierAI/pillIdentifier')
feature_extractor = ViTImageProcessor(
image_size=224,
do_resize=True,
do_normalize=True,
do_rescale=False,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
)
config = pretrained_model.config
config.num_labels = 2112 # Change this to the appropriate number of classes
model = ViTForImageClassification(config)
model.vit = pretrained_model
model.eval()
# def preprocess_image(contents):
def preprocess_image(image):
# Convert image bytes to PIL Image
# image = Image.open(io.BytesIO(contents))
image = Image.fromarray(np.uint8(image))
if image.mode != 'RGB':
image = image.convert('RGB')
# Use the feature extractor directly
inputs = feature_extractor(images=[image])
image_tensor = inputs['pixel_values'][0]
# Convert to tensor
image_tensor = torch.tensor(image_tensor, dtype=torch.float32)
return image_tensor
def predict(image_tensor, top_k=5):
# Ensure the model is in evaluation mode
model.eval()
# Make prediction
with torch.no_grad():
outputs = model(pixel_values=image_tensor.unsqueeze(0)) # Add batch dimension
logits = outputs.logits.numpy()
# Get top k predictions and their probabilities
predictions = np.argsort(logits, axis=1)[:, ::-1][:, :top_k]
probabilities = np.sort(logits, axis=1)[:, ::-1][:, :top_k]
# Decode predictions using the label encoder and create the result dictionary
result = {}
for i in range(top_k):
class_name = encoder.inverse_transform([predictions[0][i]])[0]
probability = probabilities[0][i]
result[i + 1] = {'label': str(class_name), 'probability': float(probability)}
return result
iface = gr.Interface(fn=test2, inputs="image", outputs="text")
iface.launch(share=True)