import layoutparser as lp import pytesseract from pdf2image import convert_from_path from PIL import Image def convert_pdf_to_images(pdf_path): return convert_from_path(pdf_path) def analyze_layout(image): model = lp.EfficientDetLayoutModel( "lp://efficientdet/PubLayNet", extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.6], label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"} ) return model.detect(image)