Spaces:
Runtime error
Runtime error
File size: 1,441 Bytes
6581de9 5279e45 6581de9 df73b43 6581de9 4bf6412 df73b43 4bf6412 64da888 1f662e3 f7fe7ff df73b43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import torch
import streamlit as st
from PIL import Image
from transformers import VisionEncoderDecoderModel, VisionEncoderDecoderConfig # , DonutProcessor
def demo_process(input_img):
global pretrained_model, task_prompt # , task_name
# input_img = Image.fromarray(input_img)
output = pretrained_model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
return output
task_prompt = f"<s>"
st.text('''
This is OCR-free Document Understanding Transformer nicknamed 🍩. It was fine-tuned with 1000 receipt images -> SROIE dataset.
The original 🍩 implementation can be found on: https://github.com/clovaai/donut
''')
with st.sidebar:
information = st.radio(
"What information inside the are you interested in?",
('Receipt Summary', 'Receipt Menu Details', 'Extract all!'))
receipt = st.selectbox('Pick one receipt', ['1', '2', '3', '4', '5', '6'], index='6')
st.text(f'{information} mode is ON!\nTarget receipt: {receipt}\n(opening image @:./img/receipt-{receipt}.png)')
image = Image.open(f"./img/receipt-{receipt}.jpg")
st.image(image, caption='Your target receipt')
st.text(f'baking the 🍩...')
pretrained_model = VisionEncoderDecoderModel.from_pretrained("unstructured/donut-base-sroie")
pretrained_model.encoder.to(torch.bfloat16)
pretrained_model.eval()
st.text(f'parsing receipt..')
parsed_receipt_info = demo_process(image)
st.text(f'\nRaw output:\n{parsed_receipt_info}') |