Spaces:

atlury
/

document-layout-comparison

Sleeping

atlury commited on Jun 12, 2024

Commit

4dee5e9

verified ·

1 Parent(s): 778225c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import requests
 import torch
-# Ensure the model file is in the correct location
 model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
 if not os.path.exists(model_path):
     # Download the model file if it doesn't exist
@@ -17,21 +17,25 @@ if not os.path.exists(model_path):
 # Load the document segmentation model
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-docseg_model = YOLO(model_path).to(device)
 def process_image(image):
-    # Convert image to the format YOLO model expects
-    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-    results = docseg_model(image)
-    # Extract annotated image from results
-    annotated_img = results[0].plot()
-    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
-    # Prepare detected areas and labels as text output
-    detected_areas_labels = "\n".join(
-        [f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
-    )
     return annotated_img, detected_areas_labels
@@ -48,7 +52,5 @@ with gr.Blocks() as interface:
         outputs=[output_image, output_text]
     )
-interface.launch()
-if __name__ == "__main__":
-    interface.launch()

 import requests
 import torch
+# Load the model file
 model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
 if not os.path.exists(model_path):
     # Download the model file if it doesn't exist
 # Load the document segmentation model
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+docseg_model = YOLO(model_path)  # Remove .to(device) to let ultralytics auto-detect
 def process_image(image):
+    try:
+        # Convert image to the format YOLO model expects
+        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        results = docseg_model.predict(image)  # Use predict for inference
+        result = results[0]  # Get the first (and usually only) result
+        # Extract annotated image from results
+        annotated_img = result.plot()  # Simplified plotting
+        annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
+        # Prepare detected areas and labels as text output
+        detected_areas_labels = "\n".join(
+            [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]  # Uppercase labels
+        )
+    except Exception as e:
+        return None, f"Error during processing: {e}"  # Error handling
     return annotated_img, detected_areas_labels
         outputs=[output_image, output_text]
     )
+# Launch the interface (remove the conditional launch)
+interface.launch(share=True)  # Allow sharing for easier debugging