File size: 679 Bytes
e205139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import gradio as gr
import os
from huggingface_hub import snapshot_download
from pdf_extract_kit import extract_text  # Assuming this function exists in the toolkit

# Ensure the model is downloaded
model_dir = "./PDF-Extract-Kit-1.0"
if not os.path.exists(model_dir):
    snapshot_download(repo_id='opendatalab/PDF-Extract-Kit-1.0', local_dir=model_dir, max_workers=20)

def process_pdf(file):
    # Implement your PDF processing logic here using PDF-Extract-Kit
    extracted_text = extract_text(file.name, model_dir=model_dir)
    return extracted_text

iface = gr.Interface(fn=process_pdf, inputs=gr.File(type="binary"), outputs="text", title="PDF Extractor")
iface.launch()