Spaces:

yuanphon
/

NTHU-dogs-identification

Sleeping

App Files Files Community

yuanphon commited on Jan 5, 2024

Commit

5cc1c86

1 Parent(s): d444104

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -3

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 from PIL import Image
 from transformers import ViTForImageClassification, AutoImageProcessor, AdamW, ViTImageProcessor, VisionEncoderDecoderModel, AutoTokenizer
 from torch.utils.data import DataLoader, TensorDataset
 model_path = '/home/user/app'
 train_pickle_path = 'train_data.pickle'
@@ -152,7 +153,7 @@ def train_model():
     model.save_pretrained("model")
-def predict():
     # Load the model
     model = ViTForImageClassification.from_pretrained(model_path, num_labels=num_classes)
@@ -162,7 +163,8 @@ def predict():
     # Load the test data
     # Load the image
-    img = cv2.imread(test_image_path)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     # Resize the image to 224x224 pixels
@@ -214,16 +216,28 @@ def output(predict_class, caption):
     conj = ['are', 'is', 'dog']
     if predict_class == '不是校狗' or caption.find('dog') == -1:
         print(f'{caption} ({predict_class})')
     else:
         for c in conj:
             if caption.find(c) != -1:
                 print(f'{predict_class} is{caption[caption.find(c) + len(c):]}')
                 return
         print(f'{caption} ({predict_class})')
 if __name__ == '__main__':
     if not os.path.exists(model_path):
         train_model()
-    output(predict(), captioning())

 from PIL import Image
 from transformers import ViTForImageClassification, AutoImageProcessor, AdamW, ViTImageProcessor, VisionEncoderDecoderModel, AutoTokenizer
 from torch.utils.data import DataLoader, TensorDataset
+import gradio as gr
 model_path = '/home/user/app'
 train_pickle_path = 'train_data.pickle'
     model.save_pretrained("model")
+def predict(upload_image):
     # Load the model
     model = ViTForImageClassification.from_pretrained(model_path, num_labels=num_classes)
     # Load the test data
     # Load the image
+    # img = cv2.imread(test_image_path)
+    img = upload_image
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     # Resize the image to 224x224 pixels
     conj = ['are', 'is', 'dog']
     if predict_class == '不是校狗' or caption.find('dog') == -1:
         print(f'{caption} ({predict_class})')
+        return (f'{caption} ({predict_class})')
     else:
         for c in conj:
             if caption.find(c) != -1:
                 print(f'{predict_class} is{caption[caption.find(c) + len(c):]}')
                 return
         print(f'{caption} ({predict_class})')
 if __name__ == '__main__':
     if not os.path.exists(model_path):
         train_model()
+    # output(predict(), captioning())
+    # def greet(name):
+    #     return "Hello " + name + "!!"
+    def get_result(upload_image):
+        result = output(predict(upload_image), captioning())
+        return result
+    iface = gr.Interface(fn=get_result, inputs="image", outputs="text")
+    iface.launch()