Spaces:

atlury
/

document-layout-comparison

Sleeping

App Files Files Community

document-layout-comparison / app.py

atlury

Update app.py

79b9485 verified 10 months ago

raw

history blame

2.18 kB

	import gradio as gr
	from ultralytics import YOLO
	import cv2
	import numpy as np
	import os
	import requests
	import torch
	import huggingface_hub
	from accelerate import Accelerator
	from huggingface_hub import notebook_login # Added this for HF login
	from huggingface_hub.utils import HfHubHTTPError # Added this to catch HF login errors
	# Initialize Hugging Face Hub login
	notebook_login()
	# Initialize Accelerator
	accelerator = Accelerator()


	# Load the model file
	model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
	if not os.path.exists(model_path):
	# Download the model file if it doesn't exist
	model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
	try:
	response = requests.get(model_url)
	with open(model_path, "wb") as f:
	f.write(response.content)
	except HfHubHTTPError as e:
	if e.response.status_code == 401:
	print("Authentication error. Please login to Hugging Face Hub.")
	else:
	raise e
	# Load the document segmentation model
	docseg_model = YOLO(model_path)


	docseg_model = accelerator.prepare(docseg_model)

	def process_image(image):
	try:
	# Convert image to the format YOLO model expects
	image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

	# Move image to accelerator
	image = torch.from_numpy(image).to(accelerator.device)

	results = docseg_model.predict(image)
	result = results[0] # Get the first (and usually only) result

	# Extract annotated image from results
	annotated_img = result.plot()
	annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)

	# Prepare detected areas and labels as text output
	detected_areas_labels = "\n".join(
	[f"{box.label.upper()}: {box.conf:.2f}" for box in result.boxes]
	)
	except Exception as e:
	return None, f"Error during processing: {e}" # Error handling

	return annotated_img, detected_areas_labels

	# The rest of the code remains the same (Gradio interface)