Spaces:

mo-thecreator
/

detect-and-describe

Sleeping

Mohammed Abdeldayem commited on Oct 5, 2024

Commit

3fa08b4

verified ·

1 Parent(s): 9711b09

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,19 +16,23 @@ def init():
     # Step 1: Load the YOLOv5 model from Hugging Face
     try:
         object_detection_model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/weights/best14.pt', trust_repo=True)
         print("YOLOv5 model loaded successfully.")
     except Exception as e:
         print(f"Error loading YOLOv5 model: {e}")
     # Step 2: Load the ViT-GPT2 captioning model from Hugging Face
     try:
         captioning_model = VisionEncoderDecoderModel.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         tokenizer = AutoTokenizer.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         captioning_processor = AutoImageProcessor.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         print("ViT-GPT2 model loaded successfully.")
     except Exception as e:
         print(f"Error loading captioning model: {e}")
 # Utility function to crop objects from the image based on bounding boxes
 def crop_objects(image, boxes):
@@ -44,7 +48,7 @@ def process_image(image):
     # Ensure models are loaded
     if object_detection_model is None or captioning_model is None or tokenizer is None or captioning_processor is None:
-        init()  # Call init to load models
     try:
         # Step 1: Perform object detection with YOLOv5

     # Step 1: Load the YOLOv5 model from Hugging Face
     try:
+        print("Loading YOLOv5 model...")
         object_detection_model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/weights/best14.pt', trust_repo=True)
         print("YOLOv5 model loaded successfully.")
     except Exception as e:
         print(f"Error loading YOLOv5 model: {e}")
+        object_detection_model = None
     # Step 2: Load the ViT-GPT2 captioning model from Hugging Face
     try:
+        print("Loading ViT-GPT2 model...")
         captioning_model = VisionEncoderDecoderModel.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         tokenizer = AutoTokenizer.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         captioning_processor = AutoImageProcessor.from_pretrained("motheecreator/ViT-GPT2-Image-Captioning")
         print("ViT-GPT2 model loaded successfully.")
     except Exception as e:
         print(f"Error loading captioning model: {e}")
+        captioning_model, tokenizer, captioning_processor = None, None, None
 # Utility function to crop objects from the image based on bounding boxes
 def crop_objects(image, boxes):
     # Ensure models are loaded
     if object_detection_model is None or captioning_model is None or tokenizer is None or captioning_processor is None:
+        return None, {"error": "Models are not loaded properly"}, None
     try:
         # Step 1: Perform object detection with YOLOv5