atlury commited on
Commit
4dee5e9
·
verified ·
1 Parent(s): 778225c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  import requests
7
  import torch
8
 
9
- # Ensure the model file is in the correct location
10
  model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
11
  if not os.path.exists(model_path):
12
  # Download the model file if it doesn't exist
@@ -17,21 +17,25 @@ if not os.path.exists(model_path):
17
 
18
  # Load the document segmentation model
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
- docseg_model = YOLO(model_path).to(device)
21
 
22
  def process_image(image):
23
- # Convert image to the format YOLO model expects
24
- image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
25
- results = docseg_model(image)
26
-
27
- # Extract annotated image from results
28
- annotated_img = results[0].plot()
29
- annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
30
-
31
- # Prepare detected areas and labels as text output
32
- detected_areas_labels = "\n".join(
33
- [f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
34
- )
 
 
 
 
35
 
36
  return annotated_img, detected_areas_labels
37
 
@@ -48,7 +52,5 @@ with gr.Blocks() as interface:
48
  outputs=[output_image, output_text]
49
  )
50
 
51
- interface.launch()
52
-
53
- if __name__ == "__main__":
54
- interface.launch()
 
6
  import requests
7
  import torch
8
 
9
+ # Load the model file
10
  model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
11
  if not os.path.exists(model_path):
12
  # Download the model file if it doesn't exist
 
17
 
18
  # Load the document segmentation model
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+ docseg_model = YOLO(model_path) # Remove .to(device) to let ultralytics auto-detect
21
 
22
  def process_image(image):
23
+ try:
24
+ # Convert image to the format YOLO model expects
25
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
26
+ results = docseg_model.predict(image) # Use predict for inference
27
+ result = results[0] # Get the first (and usually only) result
28
+
29
+ # Extract annotated image from results
30
+ annotated_img = result.plot() # Simplified plotting
31
+ annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
32
+
33
+ # Prepare detected areas and labels as text output
34
+ detected_areas_labels = "\n".join(
35
+ [f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes] # Uppercase labels
36
+ )
37
+ except Exception as e:
38
+ return None, f"Error during processing: {e}" # Error handling
39
 
40
  return annotated_img, detected_areas_labels
41
 
 
52
  outputs=[output_image, output_text]
53
  )
54
 
55
+ # Launch the interface (remove the conditional launch)
56
+ interface.launch(share=True) # Allow sharing for easier debugging