RT-Detr-ArabicLayoutAnalysisR

Sleeping

App Files Files Community

omarelsayeed commited on Nov 30, 2024

Commit

418b440

verified ·

1 Parent(s): aecb855

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -8

app.py CHANGED Viewed

@@ -52,14 +52,56 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
                 ret[idx] = orders[idx].pop()
     return ret
-def get_orders(image_path, boxes):
-    b = scale_and_normalize_boxes(boxes)
-    inputs = boxes2inputs(b)
-    inputs = {k: v.to(layout_model.device) for k, v in inputs.items()}  # Move inputs to model device
-    logits = layout_model(**inputs).logits.cpu().squeeze(0)  # Perform inference and get logits
-    orders = parse_logits(logits, len(b))
-    return orders
 model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"

                 ret[idx] = orders[idx].pop()
     return ret
+def get_orders(_,bounding_boxes):
+    """
+    Detects reading order for Arabic text layout, given bounding boxes in xyxy format.
+    Args:
+    - bounding_boxes: List of tuples (x1, y1, x2, y2), where
+                      (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner of the bounding box.
+    Returns:
+    - A list of indices representing the reading order.
+    """
+    # Convert to numpy array for easier processing
+    bounding_boxes = [tuple(b) for b in bounding_boxes]
+    boxes = np.array(bounding_boxes)
+    # Extract positions: (x1, y1) as the top-left, (x2, y2) as the bottom-right
+    # Sort by vertical position first (y1), and then horizontal position (x1), with right-to-left sorting
+    sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))  # Sort by y1, then by x1 (right-to-left)
+    # Sort within rows by checking overlap tolerance for y coordinates
+    rows = []
+    tolerance = 10  # Tolerance for grouping elements into rows
+    for idx in sorted_indices:
+        placed = False
+        for row in rows:
+            # Check if the box belongs to an existing row (y1 overlap within tolerance)
+            if abs(row[-1][1] - boxes[idx][1]) < tolerance:
+                row.append(boxes[idx])
+                placed = True
+                break
+        if not placed:
+            rows.append([boxes[idx]])
+    # Within each row, sort by x1 (right-to-left)
+    reading_order = []
+    for row in rows:
+        row.sort(key=lambda b: -b[0])  # Sort by x1 descending (right-to-left)
+        reading_order.extend(row)
+    # Return the indices of the bounding boxes in the correct reading order
+    return [bounding_boxes.index(tuple(box)) for box in reading_order]
+# def get_orders(image_path, boxes):
+#     b = scale_and_normalize_boxes(boxes)
+#     inputs = boxes2inputs(b)
+#     inputs = {k: v.to(layout_model.device) for k, v in inputs.items()}  # Move inputs to model device
+#     logits = layout_model(**inputs).logits.cpu().squeeze(0)  # Perform inference and get logits
+#     orders = parse_logits(logits, len(b))
+#     return orders
 model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"