atnanahidiw commited on
Commit
f866fba
·
verified ·
1 Parent(s): 69e99cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -8
app.py CHANGED
@@ -1,18 +1,59 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
5
 
6
  def predict(input_img):
7
- predictions = pipeline(input_img)
8
- return input_img, {p["label"]: p["score"] for p in predictions}
 
 
 
9
 
10
  gradio_app = gr.Interface(
11
  predict,
12
- inputs=gr.Image(label="Select hot dog candidate", sources=['upload', 'webcam'], type="pil"),
13
- outputs=[gr.Image(label="Processed Image"), gr.Label(label="Result", num_top_classes=2)],
14
- title="Hot Dog? Or Not?",
 
15
  )
16
 
 
17
  if __name__ == "__main__":
18
- gradio_app.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
3
+
4
+
5
+ processor = DonutProcessor.from_pretrained("nielsr/donut-demo")
6
+ model = VisionEncoderDecoderModel.from_pretrained("nielsr/donut-demo")
7
+
8
+
9
+ def donut(input_img):
10
+ # prepare encoder inputs
11
+ pixel_values = processor(sample["image"].convert("RGB"), return_tensors="pt").pixel_values
12
+ pixel_values = pixel_values.to(device)
13
+ # prepare decoder inputs
14
+ task_prompt = "<s_cord-v2>"
15
+ decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
16
+ decoder_input_ids = decoder_input_ids.to(device)
17
+
18
+ # autoregressively generate sequence
19
+ model = model.to(device)
20
+ return model.generate(
21
+ pixel_values,
22
+ decoder_input_ids=decoder_input_ids,
23
+ max_length=model.decoder.config.max_position_embeddings,
24
+ early_stopping=True,
25
+ pad_token_id=processor.tokenizer.pad_token_id,
26
+ eos_token_id=processor.tokenizer.eos_token_id,
27
+ use_cache=True,
28
+ num_beams=1,
29
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
30
+ return_dict_in_generate=True,
31
+ )
32
+
33
+
34
+ def parse_json(outputs):
35
+ seq = processor.batch_decode(outputs.sequences)[0]
36
+ seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
37
+ seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token
38
+
39
+ return processor.token2json(seq)
40
 
 
41
 
42
  def predict(input_img):
43
+ outputs = donut(input_img)
44
+ result = parse_json(outputs)
45
+
46
+ return result
47
+
48
 
49
  gradio_app = gr.Interface(
50
  predict,
51
+ inputs=gr.Image(label="Upload gambar dokumen", sources=['upload', 'webcam'], type="pil"),
52
+ outputs=[gr.JSON(label="Hasil")],
53
+ title="OCR Dokumen Identitas Indonesia",
54
+ description="Ekstraksi gambar KTP, SIM, Paspor, KK, dan NPWP menjadi data teks tersturktur",
55
  )
56
 
57
+
58
  if __name__ == "__main__":
59
+ gradio_app.launch(share=True)