File size: 700 Bytes
e205139
b39b068
 
e205139
b39b068
 
 
 
e205139
b39b068
 
 
 
 
e205139
b39b068
 
 
 
e205139
b39b068
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import gradio as gr
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from huggingface_hub import snapshot_download
import os

# Step 1: Download the model
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0')

# Step 2: Initialize pipeline
pipe = pipeline(
    task=Tasks.document_segmentation,
    model=model_dir
)

# Step 3: Define inference function
def extract_info_from_pdf(pdf_file):
    result = pipe({'file': pdf_file.name})
    return str(result)

# Step 4: Gradio UI
gr.Interface(
    fn=extract_info_from_pdf,
    inputs=gr.File(type="binary", label="Upload PDF"),
    outputs="text",
    title="PDF Extractor (PDF-Extract-Kit)"
).launch()