Spaces:
Runtime error
Runtime error
Commit
·
412c90a
1
Parent(s):
211c5c8
order of model
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
import boto3
|
4 |
import traceback
|
5 |
import re
|
|
|
6 |
|
7 |
import gradio as gr
|
8 |
from PIL import Image, ImageDraw
|
@@ -20,6 +21,9 @@ ssl._create_default_https_context = ssl._create_unverified_context
|
|
20 |
|
21 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
22 |
|
|
|
|
|
|
|
23 |
# Init models
|
24 |
|
25 |
layoutlm_pipeline = pipeline(
|
@@ -52,6 +56,7 @@ def image_to_byte_array(image: Image) -> bytes:
|
|
52 |
|
53 |
|
54 |
def run_textract(question, document):
|
|
|
55 |
image_as_byte_base64 = image_to_byte_array(image=document.b)
|
56 |
response = boto3.client("textract").analyze_document(
|
57 |
Document={
|
@@ -71,6 +76,7 @@ def run_textract(question, document):
|
|
71 |
]
|
72 |
},
|
73 |
)
|
|
|
74 |
for element in response["Blocks"]:
|
75 |
if element["BlockType"] == "QUERY_RESULT":
|
76 |
return {
|
@@ -83,7 +89,9 @@ def run_textract(question, document):
|
|
83 |
|
84 |
|
85 |
def run_layoutlm(question, document):
|
|
|
86 |
result = layoutlm_pipeline(document.context["image"][0][0], question)[0]
|
|
|
87 |
# [{'score': 0.9999411106109619, 'answer': 'LETTER OF CREDIT', 'start': 106, 'end': 108}]
|
88 |
return {
|
89 |
"score": result["score"],
|
@@ -94,6 +102,7 @@ def run_layoutlm(question, document):
|
|
94 |
|
95 |
|
96 |
def run_lilt(question, document):
|
|
|
97 |
# use this model + tokenizer
|
98 |
processed_document = document.context["image"][0][1]
|
99 |
words = [x[0] for x in processed_document]
|
@@ -107,6 +116,7 @@ def run_lilt(question, document):
|
|
107 |
return_tensors="pt",
|
108 |
)
|
109 |
outputs = lilt_model(**encoding)
|
|
|
110 |
|
111 |
answer_start_index = outputs.start_logits.argmax()
|
112 |
answer_end_index = outputs.end_logits.argmax()
|
@@ -125,6 +135,7 @@ def run_lilt(question, document):
|
|
125 |
|
126 |
|
127 |
def run_donut(question, document):
|
|
|
128 |
# prepare encoder inputs
|
129 |
pixel_values = donut_processor(
|
130 |
document.context["image"][0][0], return_tensors="pt"
|
@@ -150,6 +161,7 @@ def run_donut(question, document):
|
|
150 |
bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
|
151 |
return_dict_in_generate=True,
|
152 |
)
|
|
|
153 |
sequence = donut_processor.batch_decode(outputs.sequences)[0]
|
154 |
sequence = sequence.replace(donut_processor.tokenizer.eos_token, "").replace(
|
155 |
donut_processor.tokenizer.pad_token, ""
|
@@ -242,10 +254,13 @@ MODELS = {
|
|
242 |
def process_question(question, document, model=list(MODELS.keys())[0]):
|
243 |
if not question or document is None:
|
244 |
return None, None, None
|
|
|
245 |
prediction = MODELS[model](question=question, document=document)
|
|
|
246 |
pages = [x.copy().convert("RGB") for x in document.preview]
|
247 |
text_value = prediction["answer"]
|
248 |
if "word_ids" in prediction:
|
|
|
249 |
image = pages[prediction["page"]]
|
250 |
draw = ImageDraw.Draw(image, "RGBA")
|
251 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
@@ -410,7 +425,7 @@ examples = [
|
|
410 |
|
411 |
with gr.Blocks(css=CSS) as demo:
|
412 |
gr.Markdown("# Document Question Answer Comparator Engine")
|
413 |
-
gr.Markdown("
|
414 |
|
415 |
document = gr.Variable()
|
416 |
example_question = gr.Textbox(visible=False)
|
|
|
3 |
import boto3
|
4 |
import traceback
|
5 |
import re
|
6 |
+
import logging
|
7 |
|
8 |
import gradio as gr
|
9 |
from PIL import Image, ImageDraw
|
|
|
21 |
|
22 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
23 |
|
24 |
+
logging.basicConfig(level=logging.DEBUG)
|
25 |
+
logger = logging.getLogger(__name__)
|
26 |
+
|
27 |
# Init models
|
28 |
|
29 |
layoutlm_pipeline = pipeline(
|
|
|
56 |
|
57 |
|
58 |
def run_textract(question, document):
|
59 |
+
logger.info(f"Running Textract model.")
|
60 |
image_as_byte_base64 = image_to_byte_array(image=document.b)
|
61 |
response = boto3.client("textract").analyze_document(
|
62 |
Document={
|
|
|
76 |
]
|
77 |
},
|
78 |
)
|
79 |
+
logger.info(f"Output of Textract model {response}.")
|
80 |
for element in response["Blocks"]:
|
81 |
if element["BlockType"] == "QUERY_RESULT":
|
82 |
return {
|
|
|
89 |
|
90 |
|
91 |
def run_layoutlm(question, document):
|
92 |
+
logger.info(f"Running layoutlm model.")
|
93 |
result = layoutlm_pipeline(document.context["image"][0][0], question)[0]
|
94 |
+
logger.info(f"Output of layoutlm model {result}.")
|
95 |
# [{'score': 0.9999411106109619, 'answer': 'LETTER OF CREDIT', 'start': 106, 'end': 108}]
|
96 |
return {
|
97 |
"score": result["score"],
|
|
|
102 |
|
103 |
|
104 |
def run_lilt(question, document):
|
105 |
+
logger.info(f"Running lilt model.")
|
106 |
# use this model + tokenizer
|
107 |
processed_document = document.context["image"][0][1]
|
108 |
words = [x[0] for x in processed_document]
|
|
|
116 |
return_tensors="pt",
|
117 |
)
|
118 |
outputs = lilt_model(**encoding)
|
119 |
+
logger.info(f"Output for lilt model {outputs}.")
|
120 |
|
121 |
answer_start_index = outputs.start_logits.argmax()
|
122 |
answer_end_index = outputs.end_logits.argmax()
|
|
|
135 |
|
136 |
|
137 |
def run_donut(question, document):
|
138 |
+
logger.info(f"Running donut model.")
|
139 |
# prepare encoder inputs
|
140 |
pixel_values = donut_processor(
|
141 |
document.context["image"][0][0], return_tensors="pt"
|
|
|
161 |
bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
|
162 |
return_dict_in_generate=True,
|
163 |
)
|
164 |
+
logger.info(f"Output for donut {outputs}")
|
165 |
sequence = donut_processor.batch_decode(outputs.sequences)[0]
|
166 |
sequence = sequence.replace(donut_processor.tokenizer.eos_token, "").replace(
|
167 |
donut_processor.tokenizer.pad_token, ""
|
|
|
254 |
def process_question(question, document, model=list(MODELS.keys())[0]):
|
255 |
if not question or document is None:
|
256 |
return None, None, None
|
257 |
+
logger.info(f"Running for model {model}")
|
258 |
prediction = MODELS[model](question=question, document=document)
|
259 |
+
logger.info(f"Got prediction {prediction}")
|
260 |
pages = [x.copy().convert("RGB") for x in document.preview]
|
261 |
text_value = prediction["answer"]
|
262 |
if "word_ids" in prediction:
|
263 |
+
logger.info(f"Setting bounding boxes.")
|
264 |
image = pages[prediction["page"]]
|
265 |
draw = ImageDraw.Draw(image, "RGBA")
|
266 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
|
|
425 |
|
426 |
with gr.Blocks(css=CSS) as demo:
|
427 |
gr.Markdown("# Document Question Answer Comparator Engine")
|
428 |
+
gr.Markdown("This space compares some of the latest models that can be used commercially.")
|
429 |
|
430 |
document = gr.Variable()
|
431 |
example_question = gr.Textbox(visible=False)
|