|
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer |
|
import gradio as gr |
|
import cv2 |
|
import torch |
|
|
|
|
|
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") |
|
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") |
|
|
|
|
|
def extract_patient_info(image): |
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
|
|
|
|
|
|
|
|
inputs = tokenizer(image_rgb, return_tensors="pt") |
|
outputs = model(**inputs) |
|
|
|
|
|
|
|
return "Extracted text using LayoutLM will go here." |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("### OCR Using LayoutLM Pretrained Model") |
|
|
|
|
|
image_input = gr.Image(type="numpy", label="Upload Image") |
|
|
|
|
|
name_output = gr.Textbox(label="Patient Name") |
|
age_output = gr.Textbox(label="Age") |
|
gender_output = gr.Textbox(label="Gender") |
|
phone_output = gr.Textbox(label="Phone Number") |
|
|
|
|
|
process_button = gr.Button("Process Image") |
|
|
|
|
|
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=[name_output, age_output, gender_output, phone_output]) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|