Spaces:
Sleeping
Sleeping
File size: 1,413 Bytes
c7f3bef b215e79 aeab1f2 c7f3bef 3e75990 c7f3bef f0cbdb2 c7f3bef aeab1f2 c7f3bef f0cbdb2 875cd79 f0cbdb2 a883718 bed03e6 f0cbdb2 875cd79 3e75990 f0cbdb2 3e75990 f0cbdb2 3e75990 b215e79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
import fitz
import os
import zipfile
from donut import DonutModel
def demo_process(input_pdf):
# Conversion of PDF to JPG images
pdf = fitz.open(input_pdf)
first_page = pdf[0]
pix = first_page.get_pixmap()
image_bytes = pix.tobytes("png")
pdf.close()
temp_dir = "images"
basename = os.path.basename(input_pdf).split('.')[0]
image_name = basename + ".jpg"
os.makedirs(temp_dir, exist_ok=True)
with open(os.path.join(temp_dir, image_name), "wb") as f:
f.write(image_bytes)
# zip_path = "images.zip"
# image_path = os.path.join(temp_dir, image_name)
output = model.inference(image=image_name, prompt=task_prompt)["predictions"][0]
# with zipfile.ZipFile(zip_path, "w") as zipf:
# zipf.write(os.path.join(temp_dir, image_name), image_name)
os.remove(image_path)
os.rmdir(temp_dir)
return output
task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()
iface = gr.Interface(
fn=demo_process,
inputs=gr.File(label="Upload PDF File"),
outputs=gr.File(label="Download ZIP File"),
title="PDF to Images Converter",
description="Upload a PDF file and download a ZIP file containing all the pages as images. Host it on huggingface for convenience."
)
iface.launch()
|