microsoft
/

OmniParser-v2.0

@@ -24,11 +24,9 @@ easyocr.Reader(["en"])
 class EndpointHandler:
     def __init__(self, model_dir: str = "/repository") -> None:
         self.device = (
-            torch.device("cuda")
-            if torch.cuda.is_available()
-            else torch.device("mps")
-            if torch.backends.mps.is_available()
-            else torch.device("cpu")
         )
         # bounding box detection model
@@ -54,7 +52,7 @@ class EndpointHandler:
         # data should contain the following:
         #  "inputs": {
         #    "image": url/base64,
-        #    (optional) "image_size": tuple(int, int) / list(int),
         #    (optional) "bbox_threshold": float,
         #    (optional) "iou_threshold": float,
         #  }
@@ -63,25 +61,17 @@ class EndpointHandler:
         # read image from either url or base64 encoding
         image = load_image(data["image"])
-        # box_overlay_ratio = image.size[0] / 3200
-        # bbox_config = {
-        #     "text_scale": 0.8 * box_overlay_ratio,
-        #     "text_thickness": max(int(2 * box_overlay_ratio), 1),
-        #     "text_padding": max(int(3 * box_overlay_ratio), 1),
-        #     "thickness": max(int(3 * box_overlay_ratio), 1),
-        # }
         ocr_texts, ocr_bboxes = self.check_ocr_bboxes(
             image,
             out_format="xyxy",
-            ocr_kwargs={"paragraph": False, "text_threshold": 0.8},  # 0.9
         )
         annotated_image, filtered_bboxes_out = self.get_som_labeled_img(
             image,
             image_size=data.get("image_size", None),
             ocr_texts=ocr_texts,
             ocr_bboxes=ocr_bboxes,
-            bbox_threshold=data.get("bbox_threshold", 0.01),
             iou_threshold=data.get("iou_threshold", None),
         )
         return {

 class EndpointHandler:
     def __init__(self, model_dir: str = "/repository") -> None:
         self.device = (
+            torch.device("cuda") if torch.cuda.is_available()
+            else (torch.device("mps") if torch.backends.mps.is_available()
+                  else torch.device("cpu"))
         )
         # bounding box detection model
         # data should contain the following:
         #  "inputs": {
         #    "image": url/base64,
+        #    (optional) "image_size": {"w": int, "h": int},
         #    (optional) "bbox_threshold": float,
         #    (optional) "iou_threshold": float,
         #  }
         # read image from either url or base64 encoding
         image = load_image(data["image"])
         ocr_texts, ocr_bboxes = self.check_ocr_bboxes(
             image,
             out_format="xyxy",
+            ocr_kwargs={"text_threshold": 0.8},
         )
         annotated_image, filtered_bboxes_out = self.get_som_labeled_img(
             image,
             image_size=data.get("image_size", None),
             ocr_texts=ocr_texts,
             ocr_bboxes=ocr_bboxes,
+            bbox_threshold=data.get("bbox_threshold", 0.05),
             iou_threshold=data.get("iou_threshold", None),
         )
         return {