CXDJY commited on
Commit
fb98024
·
1 Parent(s): cedafcd

added application

Browse files
Files changed (3) hide show
  1. app.py +94 -0
  2. encoder.npy +3 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import ViTForImageClassification, ViTModel, ViTImageProcessor
5
+ from PIL import Image
6
+ import PIL
7
+ import io
8
+ from sklearn.preprocessing import LabelEncoder
9
+ import json
10
+
11
+ def greet(name):
12
+ return "Hello " + name + "!!"
13
+
14
+
15
+ async def test2(file, top_k: int = 5):
16
+ # extension = file.filename.split(".")[-1] in ("jpg", "jpeg", "png")
17
+
18
+ # if not extension:
19
+ # return "Image format must be jpg, jpeg, or png!"
20
+ # # Read image contents
21
+ # contents = await file.read()
22
+
23
+ # Preprocess image
24
+ # image_tensor = preprocess_image(contents)
25
+ image_tensor = preprocess_image(file)
26
+
27
+ # Make predictions
28
+ predictions = predict(image_tensor, top_k)
29
+
30
+ item = {"predictions": predictions}
31
+ return json.dumps(item)
32
+
33
+ encoder = LabelEncoder()
34
+ encoder.classes_ = np.load('encoder.npy', allow_pickle=True)
35
+
36
+ pretrained_model = ViTModel.from_pretrained('pillIdentifierAI/pillIdentifier')
37
+ feature_extractor = ViTImageProcessor(
38
+ image_size=224,
39
+ do_resize=True,
40
+ do_normalize=True,
41
+ do_rescale=False,
42
+ image_mean=[0.5, 0.5, 0.5],
43
+ image_std=[0.5, 0.5, 0.5],
44
+ )
45
+
46
+
47
+ config = pretrained_model.config
48
+ config.num_labels = 2112 # Change this to the appropriate number of classes
49
+ model = ViTForImageClassification(config)
50
+ model.vit = pretrained_model
51
+
52
+ model.eval()
53
+
54
+ # def preprocess_image(contents):
55
+ def preprocess_image(image):
56
+ # Convert image bytes to PIL Image
57
+ # image = Image.open(io.BytesIO(contents))
58
+ image = Image.fromarray(np.uint8(image))
59
+ if image.mode != 'RGB':
60
+ image = image.convert('RGB')
61
+
62
+ # Use the feature extractor directly
63
+ inputs = feature_extractor(images=[image])
64
+ image_tensor = inputs['pixel_values'][0]
65
+
66
+ # Convert to tensor
67
+ image_tensor = torch.tensor(image_tensor, dtype=torch.float32)
68
+
69
+ return image_tensor
70
+
71
+ def predict(image_tensor, top_k=5):
72
+ # Ensure the model is in evaluation mode
73
+ model.eval()
74
+
75
+ # Make prediction
76
+ with torch.no_grad():
77
+ outputs = model(pixel_values=image_tensor.unsqueeze(0)) # Add batch dimension
78
+ logits = outputs.logits.numpy()
79
+
80
+ # Get top k predictions and their probabilities
81
+ predictions = np.argsort(logits, axis=1)[:, ::-1][:, :top_k]
82
+ probabilities = np.sort(logits, axis=1)[:, ::-1][:, :top_k]
83
+
84
+ # Decode predictions using the label encoder and create the result dictionary
85
+ result = {}
86
+ for i in range(top_k):
87
+ class_name = encoder.inverse_transform([predictions[0][i]])[0]
88
+ probability = probabilities[0][i]
89
+ result[i + 1] = {'label': str(class_name), 'probability': float(probability)}
90
+
91
+ return result
92
+
93
+ iface = gr.Interface(fn=test2, inputs="image", outputs="text")
94
+ iface.launch(share=True)
encoder.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc6049f9944c2b553cd74ff33bd35525f86e2dcb920ecd985f58c549830ea3b
3
+ size 130192
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ tensorflow
4
+ numpy
5
+ scikit-learn
6
+ pillow
7
+ python-multipart