Spaces:
Runtime error
Runtime error
File size: 2,098 Bytes
efbf256 cc875ec f866fba 3a83e2f efbf256 f866fba fb7a6d8 f866fba 7ad138b f866fba efbf256 f866fba cc875ec f866fba cc875ec f866fba 4467b70 cc875ec f866fba cc875ec f866fba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import re
import torch
import gradio as gr
from transformers import DonutProcessor, VisionEncoderDecoderModel
processor = DonutProcessor.from_pretrained("atnanahidiw/donut-poc-1.1.1")
model = VisionEncoderDecoderModel.from_pretrained("atnanahidiw/donut-poc-1.1.1")
device = "cuda" if torch.cuda.is_available() else "cpu"
def donut(sample):
# prepare encoder inputs
pixel_values = processor(sample.convert("RGB"), return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
# prepare decoder inputs
task_prompt = "<s_cord-v2>"
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
decoder_input_ids = decoder_input_ids.to(device)
# autoregressively generate sequence
model_run = model.to(device)
return model_run.generate(
pixel_values,
decoder_input_ids=decoder_input_ids,
max_length=model.decoder.config.max_position_embeddings,
early_stopping=True,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
num_beams=1,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
)
def parse_json(outputs):
seq = processor.batch_decode(outputs.sequences)[0]
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token
return processor.token2json(seq)
def predict(input_img):
outputs = donut(input_img)
result = parse_json(outputs)
return result
gradio_app = gr.Interface(
predict,
inputs=gr.Image(label="Upload gambar dokumen", sources=['upload', 'webcam'], type="pil"),
outputs=[gr.JSON(label="Hasil")],
title="OCR Dokumen Identitas Indonesia",
description="Ekstraksi gambar dokumen identitas indonesia menjadi data teks terstruktur (KTP β
, SIM β
, Paspor β
, NPWP β
, dan KK)",
)
if __name__ == "__main__":
gradio_app.launch(share=True)
|