Spaces:
Runtime error
Runtime error
| import torch | |
| import streamlit as st | |
| from PIL import Image | |
| from transformers import VisionEncoderDecoderModel, VisionEncoderDecoderConfig # , DonutProcessor | |
| def demo_process(input_img): | |
| global pretrained_model, task_prompt # , task_name | |
| # input_img = Image.fromarray(input_img) | |
| output = pretrained_model.inference(image=input_img, prompt=task_prompt)["predictions"][0] | |
| return output | |
| task_prompt = f"<s>" | |
| st.text(''' | |
| This is OCR-free Document Understanding Transformer nicknamed 🍩. It was fine-tuned with 1000 receipt images -> SROIE dataset. | |
| The original 🍩 implementation can be found on: https://github.com/clovaai/donut | |
| ''') | |
| with st.sidebar: | |
| information = st.radio( | |
| "What information inside the are you interested in?", | |
| ('Receipt Summary', 'Receipt Menu Details', 'Extract all!')) | |
| receipt = st.selectbox('Pick one receipt', ['1', '2', '3', '4', '5', '6'], index='6') | |
| st.text(f'{information} mode is ON!\nTarget receipt: {receipt}\n(opening image @:./img/receipt-{receipt}.png)') | |
| image = Image.open(f"./img/receipt-{receipt}.jpg") | |
| st.image(image, caption='Your target receipt') | |
| st.text(f'baking the 🍩...') | |
| pretrained_model = VisionEncoderDecoderModel.from_pretrained("unstructured/donut-base-sroie") | |
| pretrained_model.encoder.to(torch.bfloat16) | |
| pretrained_model.eval() | |
| st.text(f'parsing receipt..') | |
| parsed_receipt_info = demo_process(image) | |
| st.text(f'\nRaw output:\n{parsed_receipt_info}') |