File size: 1,413 Bytes
c7f3bef
 
 
 
 
 
b215e79
aeab1f2
c7f3bef
 
 
 
3e75990
c7f3bef
 
 
 
f0cbdb2
c7f3bef
aeab1f2
c7f3bef
 
f0cbdb2
 
875cd79
f0cbdb2
a883718
bed03e6
f0cbdb2
 
875cd79
3e75990
 
 
f0cbdb2
 
3e75990
f0cbdb2
 
 
 
3e75990
b215e79
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import fitz
import os
import zipfile
from donut import DonutModel

def demo_process(input_pdf):
    
    # Conversion of PDF to JPG images
    pdf = fitz.open(input_pdf)
    first_page = pdf[0]
    pix = first_page.get_pixmap()
    image_bytes = pix.tobytes("png")
    pdf.close()

    temp_dir = "images"
    basename = os.path.basename(input_pdf).split('.')[0]
    image_name = basename + ".jpg"
    os.makedirs(temp_dir, exist_ok=True)
    
    with open(os.path.join(temp_dir, image_name), "wb") as f:
        f.write(image_bytes)
        


    # zip_path = "images.zip"
    # image_path = os.path.join(temp_dir, image_name)
    output = model.inference(image=image_name, prompt=task_prompt)["predictions"][0]    
    # with zipfile.ZipFile(zip_path, "w") as zipf:
    #     zipf.write(os.path.join(temp_dir, image_name), image_name)
        
    os.remove(image_path)
    os.rmdir(temp_dir)
    
    return output
    

task_name = "SGSInvoice"
task_prompt = f"<s_{task_name}>"
model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
model.eval()

iface = gr.Interface(
    fn=demo_process,
    inputs=gr.File(label="Upload PDF File"),
    outputs=gr.File(label="Download ZIP File"),
    title="PDF to Images Converter",
    description="Upload a PDF file and download a ZIP file containing all the pages as images. Host it on huggingface for convenience."
)
iface.launch()