Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

syedfaisalabrar commited on Feb 26

Commit

4895b5f

verified ·

1 Parent(s): 479a272

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ def preprocessing(image):
     image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
     # Convert to tensor without resizing
-    image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0  # Shape: [C, H, W]
     return image_tensor
@@ -64,21 +64,13 @@ def imageRotation(image):
 def detect_document(image):
     """Detects front and back of the document using YOLO."""
-    image = np.array(image)
     results = modelY(image, conf=0.85)
     detected_classes = set()
     labels = []
     bounding_boxes = []
-    if isinstance(image, np.ndarray):
-        if image.dtype != np.uint8:
-            image = (image * 255).clip(0, 255).astype(np.uint8)  # Convert float to uint8
-        # Ensure correct shape (H, W, C)
-        if image.shape[0] == 1 and image.shape[1] == 1:
-            image = np.squeeze(image)
     for result in results:
         for box in result.boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
@@ -89,8 +81,9 @@ def detect_document(image):
             detected_classes.add(class_name)
             label = f"{class_name} {conf:.2f}"
             labels.append(label)
-            bounding_boxes.append((x1, y1, x2, y2, class_name, conf))  # Store bounding box with class and confidence
             cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
@@ -99,17 +92,21 @@ def detect_document(image):
     if missing_classes:
         labels.append(f"Missing: {', '.join(missing_classes)}")
-    return Image.fromarray(image), labels, bounding_boxes
 def crop_image(image, bounding_boxes):
-    """Crops detected bounding boxes from the image."""
     cropped_images = {}
-    image = np.array(image)
     for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
         cropped = image[y1:y2, x1:x2]
-        cropped_images[class_name] = Image.fromarray(cropped)
     return cropped_images
@@ -136,30 +133,34 @@ def ensure_numpy(image):
         # Convert grayscale to 3-channel image
         image = np.stack([image] * 3, axis=-1)
-    return image
 def predict(image):
     """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
-    processed_image = preprocessing(image)
-    rotated_image = ensure_numpy(processed_image)
     detected_image, labels, bounding_boxes = detect_document(rotated_image)
     cropped_images = crop_image(rotated_image, bounding_boxes)
     # Call Vision AI separately for front and back if detected
-    front_result, back_result = None, None
     if "front" in cropped_images:
         front_result = vision_ai_api(cropped_images["front"], "front")
     if "back" in cropped_images:
         back_result = vision_ai_api(cropped_images["back"], "back")
     api_results = {
         "front": front_result,
         "back": back_result
     }
-    single_image = cropped_images.get("front") or cropped_images.get("back") or detected_image
-    return single_image, labels, api_results
 iface = gr.Interface(

     image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness
     # Convert to tensor without resizing
+    # image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0  # Shape: [C, H, W]
     return image_tensor
 def detect_document(image):
     """Detects front and back of the document using YOLO."""
+    image = ensure_numpy(image)  # Ensure valid format
     results = modelY(image, conf=0.85)
     detected_classes = set()
     labels = []
     bounding_boxes = []
     for result in results:
         for box in result.boxes:
             x1, y1, x2, y2 = map(int, box.xyxy[0])
             detected_classes.add(class_name)
             label = f"{class_name} {conf:.2f}"
             labels.append(label)
+            bounding_boxes.append((x1, y1, x2, y2, class_name, conf))
+            # Draw bounding box
             cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
     if missing_classes:
         labels.append(f"Missing: {', '.join(missing_classes)}")
+    return Image.fromarray(image.astype(np.uint8)), labels, bounding_boxes
 def crop_image(image, bounding_boxes):
+    """Crops detected bounding boxes from the image safely."""
+    image = ensure_numpy(image)  # Ensure image is NumPy format
     cropped_images = {}
     for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
+        # Ensure the bounding box is within image bounds
+        x1, y1, x2, y2 = max(0, x1), max(0, y1), min(image.shape[1], x2), min(image.shape[0], y2)
         cropped = image[y1:y2, x1:x2]
+        if cropped.size > 0:  # Check if valid
+            cropped_images[class_name] = Image.fromarray(cropped)
     return cropped_images
         # Convert grayscale to 3-channel image
         image = np.stack([image] * 3, axis=-1)
+    # return image
+    return image.astype(np.uint8)
 def predict(image):
     """Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
+    processed_image = preprocessing(image)  # Enhanced PIL image
+    rotated_image = ensure_numpy(processed_image)  # Convert to NumPy
     detected_image, labels, bounding_boxes = detect_document(rotated_image)
+    if not bounding_boxes:
+        return detected_image, labels, {"error": "No document detected!"}
     cropped_images = crop_image(rotated_image, bounding_boxes)
     # Call Vision AI separately for front and back if detected
+    front_result = back_result = None
     if "front" in cropped_images:
         front_result = vision_ai_api(cropped_images["front"], "front")
     if "back" in cropped_images:
         back_result = vision_ai_api(cropped_images["back"], "back")
     api_results = {
         "front": front_result,
         "back": back_result
     }
+    return detected_image, labels, api_results
 iface = gr.Interface(