omarelsayeed commited on
Commit
418b440
·
verified ·
1 Parent(s): aecb855

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -8
app.py CHANGED
@@ -52,14 +52,56 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
52
  ret[idx] = orders[idx].pop()
53
 
54
  return ret
55
-
56
- def get_orders(image_path, boxes):
57
- b = scale_and_normalize_boxes(boxes)
58
- inputs = boxes2inputs(b)
59
- inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
60
- logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
61
- orders = parse_logits(logits, len(b))
62
- return orders
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"
 
52
  ret[idx] = orders[idx].pop()
53
 
54
  return ret
55
+ def get_orders(_,bounding_boxes):
56
+ """
57
+ Detects reading order for Arabic text layout, given bounding boxes in xyxy format.
58
+
59
+ Args:
60
+ - bounding_boxes: List of tuples (x1, y1, x2, y2), where
61
+ (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner of the bounding box.
62
+
63
+ Returns:
64
+ - A list of indices representing the reading order.
65
+ """
66
+ # Convert to numpy array for easier processing
67
+ bounding_boxes = [tuple(b) for b in bounding_boxes]
68
+ boxes = np.array(bounding_boxes)
69
+
70
+ # Extract positions: (x1, y1) as the top-left, (x2, y2) as the bottom-right
71
+ # Sort by vertical position first (y1), and then horizontal position (x1), with right-to-left sorting
72
+ sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1])) # Sort by y1, then by x1 (right-to-left)
73
+
74
+ # Sort within rows by checking overlap tolerance for y coordinates
75
+ rows = []
76
+ tolerance = 10 # Tolerance for grouping elements into rows
77
+ for idx in sorted_indices:
78
+ placed = False
79
+ for row in rows:
80
+ # Check if the box belongs to an existing row (y1 overlap within tolerance)
81
+ if abs(row[-1][1] - boxes[idx][1]) < tolerance:
82
+ row.append(boxes[idx])
83
+ placed = True
84
+ break
85
+ if not placed:
86
+ rows.append([boxes[idx]])
87
+
88
+ # Within each row, sort by x1 (right-to-left)
89
+ reading_order = []
90
+ for row in rows:
91
+ row.sort(key=lambda b: -b[0]) # Sort by x1 descending (right-to-left)
92
+ reading_order.extend(row)
93
+
94
+ # Return the indices of the bounding boxes in the correct reading order
95
+ return [bounding_boxes.index(tuple(box)) for box in reading_order]
96
+
97
+
98
+ # def get_orders(image_path, boxes):
99
+ # b = scale_and_normalize_boxes(boxes)
100
+ # inputs = boxes2inputs(b)
101
+ # inputs = {k: v.to(layout_model.device) for k, v in inputs.items()} # Move inputs to model device
102
+ # logits = layout_model(**inputs).logits.cpu().squeeze(0) # Perform inference and get logits
103
+ # orders = parse_logits(logits, len(b))
104
+ # return orders
105
 
106
 
107
  model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"