uartimcs commited on
Commit
013f423
·
verified ·
1 Parent(s): 624d1e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -24
app.py CHANGED
@@ -1,25 +1,51 @@
1
  import gradio as gr
2
- import argparse
3
- import torch
4
- from donut import DonutModel
5
- from PIL import Image
6
- from poppler import load_from_file, PageRenderer
7
-
8
- def demo_process(pdf_file):
9
- pdf_document = load_from_file(pdf_file.name)
10
- page_1 = pdf_document.create_page(0)
11
-
12
- renderer = PageRenderer()
13
- image = renderer.render_page(page_1)
14
- image_data = image.data
15
- input_img = Image.fromarray(image_data)
16
- output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
17
- return output
18
-
19
- model = DonutModel.from_pretrained("uartimcs/donut-invoice-extract")
20
- model.eval()
21
- task_name = "SGSInvoice"
22
- task_prompt = f"<s_{task_name}>"
23
-
24
- demo = gr.Interface(fn=demo_process,inputs=gr.File(file_count='single', file_types=['.pdf']),outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",)
25
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import fitz
3
+ import os
4
+ import zipfile
5
+
6
+ def pdf_to_images(pdf_file):
7
+
8
+ doc = fitz.open(pdf_file)
9
+ images = []
10
+
11
+ for page_id in range(doc.page_count):
12
+ page = doc[page_id]
13
+
14
+ pix = page.get_pixmap()
15
+ img_bytes = pix.tobytes("png")
16
+ images.append((img_bytes, f"{page_id+1}.png"))
17
+
18
+
19
+ doc.close()
20
+
21
+
22
+ temp_dir = "temp_images"
23
+ os.makedirs(temp_dir, exist_ok=True)
24
+
25
+
26
+ for img_bytes, img_name in images:
27
+ with open(os.path.join(temp_dir, img_name), "wb") as f:
28
+ f.write(img_bytes)
29
+
30
+
31
+ zip_path = "images.zip"
32
+ with zipfile.ZipFile(zip_path, "w") as zipf:
33
+ for img_name in os.listdir(temp_dir):
34
+ zipf.write(os.path.join(temp_dir, img_name), img_name)
35
+
36
+
37
+ for img_name in os.listdir(temp_dir):
38
+ os.remove(os.path.join(temp_dir, img_name))
39
+ os.rmdir(temp_dir)
40
+
41
+ return zip_path
42
+
43
+ def main():
44
+ iface = gr.Interface(
45
+ fn=pdf_to_images,
46
+ inputs=gr.File(label="Upload PDF File"),
47
+ outputs=gr.File(label="Download ZIP File"),
48
+ title="PDF to Images Converter",
49
+ description="Upload a PDF file and download a ZIP file containing all the pages as images."
50
+ )
51
+ iface.launch()