Spaces:

ignaciaginting
/

extract_from_doc

Build error

File size: 700 Bytes

import gradio as gr
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from huggingface_hub import snapshot_download
import os

# Step 1: Download the model
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0')

# Step 2: Initialize pipeline
pipe = pipeline(
    task=Tasks.document_segmentation,
    model=model_dir
)

# Step 3: Define inference function
def extract_info_from_pdf(pdf_file):
    result = pipe({'file': pdf_file.name})
    return str(result)

# Step 4: Gradio UI
gr.Interface(
    fn=extract_info_from_pdf,
    inputs=gr.File(type="binary", label="Upload PDF"),
    outputs="text",
    title="PDF Extractor (PDF-Extract-Kit)"
).launch()