File size: 658 Bytes
77fbded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from pathlib import Path

from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered

# Marker init
marker_converter = PdfConverter(
    artifact_dict=create_model_dict(),
    config={
        "debug_pdf_images": True,
    },
)


def convert_marker(path: str, file_name: str):
    rendered = marker_converter(path)
    text, _, images = text_from_rendered(rendered)
    debug_image_dir = Path(rendered.metadata.get("debug_data_path"))
    debug_image_paths = [
        path for path in debug_image_dir.iterdir() if "pdf_page" in path.stem
    ]

    return text, debug_image_paths