Update app.py
Browse files
app.py
CHANGED
@@ -52,14 +52,56 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
|
|
52 |
ret[idx] = orders[idx].pop()
|
53 |
|
54 |
return ret
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"
|
|
|
52 |
ret[idx] = orders[idx].pop()
|
53 |
|
54 |
return ret
|
55 |
+
def get_orders(_,bounding_boxes):
|
56 |
+
"""
|
57 |
+
Detects reading order for Arabic text layout, given bounding boxes in xyxy format.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
- bounding_boxes: List of tuples (x1, y1, x2, y2), where
|
61 |
+
(x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner of the bounding box.
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
- A list of indices representing the reading order.
|
65 |
+
"""
|
66 |
+
# Convert to numpy array for easier processing
|
67 |
+
bounding_boxes = [tuple(b) for b in bounding_boxes]
|
68 |
+
boxes = np.array(bounding_boxes)
|
69 |
+
|
70 |
+
# Extract positions: (x1, y1) as the top-left, (x2, y2) as the bottom-right
|
71 |
+
# Sort by vertical position first (y1), and then horizontal position (x1), with right-to-left sorting
|
72 |
+
sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1])) # Sort by y1, then by x1 (right-to-left)
|
73 |
+
|
74 |
+
# Sort within rows by checking overlap tolerance for y coordinates
|
75 |
+
rows = []
|
76 |
+
tolerance = 10 # Tolerance for grouping elements into rows
|
77 |
+
for idx in sorted_indices:
|
78 |
+
placed = False
|
79 |
+
for row in rows:
|
80 |
+
# Check if the box belongs to an existing row (y1 overlap within tolerance)
|
81 |
+
if abs(row[-1][1] - boxes[idx][1]) < tolerance:
|
82 |
+
row.append(boxes[idx])
|
83 |
+
placed = True
|
84 |
+
break
|
85 |
+
if not placed:
|
86 |
+
rows.append([boxes[idx]])
|
87 |
+
|
88 |
+
# Within each row, sort by x1 (right-to-left)
|
89 |
+
reading_order = []
|
90 |
+
for row in rows:
|
91 |
+
row.sort(key=lambda b: -b[0]) # Sort by x1 descending (right-to-left)
|
92 |
+
reading_order.extend(row)
|
93 |
+
|
94 |
+
# Return the indices of the bounding boxes in the correct reading order
|
95 |
+
return [bounding_boxes.index(tuple(box)) for box in reading_order]
|
96 |
+
|
97 |
+
|
98 |
+
# def get_orders(image_path, boxes):
|
99 |
+
# b = scale_and_normalize_boxes(boxes)
|
100 |
+
# inputs = boxes2inputs(b)
|
101 |
+
# inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
|
102 |
+
# logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
|
103 |
+
# orders = parse_logits(logits, len(b))
|
104 |
+
# return orders
|
105 |
|
106 |
|
107 |
model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"
|