DSatishchandra's picture
Update app.py
526d984 verified
raw
history blame
1.75 kB
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import easyocr
# Initialize EasyOCR reader for text extraction
reader = easyocr.Reader(['en'])
# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
# Function to extract text using EasyOCR and process with LayoutLM
def extract_patient_info(image):
# Convert the uploaded image to RGB (required by LayoutLM)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use EasyOCR to extract text from the image
result = reader.readtext(image_rgb)
extracted_text = " ".join([detection[1] for detection in result])
# Tokenize the extracted text with LayoutLM
inputs = tokenizer(extracted_text, return_tensors="pt")
outputs = model(**inputs)
# For this example, we return the extracted text (you can further process LayoutLM's output)
return extracted_text
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR")
# Image upload component
image_input = gr.Image(type="numpy", label="Upload Image")
# Output textboxes to display the extracted information
output_text = gr.Textbox(label="Extracted Text")
# Button to trigger image processing and text extraction
process_button = gr.Button("Process Image")
# When the button is clicked, process the image and show results in textboxes
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()