|
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer |
|
import gradio as gr |
|
import cv2 |
|
import easyocr |
|
|
|
|
|
reader = easyocr.Reader(['en']) |
|
|
|
|
|
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") |
|
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") |
|
|
|
|
|
def extract_patient_info(image): |
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
result = reader.readtext(image_rgb) |
|
extracted_text = " ".join([detection[1] for detection in result]) |
|
|
|
|
|
inputs = tokenizer(extracted_text, return_tensors="pt") |
|
outputs = model(**inputs) |
|
|
|
|
|
return extracted_text |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR") |
|
|
|
|
|
image_input = gr.Image(type="numpy", label="Upload Image") |
|
|
|
|
|
output_text = gr.Textbox(label="Extracted Text") |
|
|
|
|
|
process_button = gr.Button("Process Image") |
|
|
|
|
|
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|