ignaciaginting's picture
app py
e205139 verified
raw
history blame
679 Bytes
import gradio as gr
import os
from huggingface_hub import snapshot_download
from pdf_extract_kit import extract_text # Assuming this function exists in the toolkit
# Ensure the model is downloaded
model_dir = "./PDF-Extract-Kit-1.0"
if not os.path.exists(model_dir):
snapshot_download(repo_id='opendatalab/PDF-Extract-Kit-1.0', local_dir=model_dir, max_workers=20)
def process_pdf(file):
# Implement your PDF processing logic here using PDF-Extract-Kit
extracted_text = extract_text(file.name, model_dir=model_dir)
return extracted_text
iface = gr.Interface(fn=process_pdf, inputs=gr.File(type="binary"), outputs="text", title="PDF Extractor")
iface.launch()