DSatishchandra's picture
Create app.py
a7847d1 verified
raw
history blame
2.03 kB
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import torch
# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
# Function to extract text using LayoutLM
def extract_patient_info(image):
# Convert the uploaded image to RGB (required by LayoutLM)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Here you can add code to process the image for LayoutLM (like extracting features)
# Since LayoutLM works better with structured documents, we can return simple OCR text
# for demonstration purposes:
# Tokenize and process the image text (you can customize the process depending on your image)
inputs = tokenizer(image_rgb, return_tensors="pt")
outputs = model(**inputs)
# For demonstration purposes, we return some dummy output
# You can adjust this based on your image and use case (extract more meaningful text)
return "Extracted text using LayoutLM will go here."
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### OCR Using LayoutLM Pretrained Model")
# Image upload component
image_input = gr.Image(type="numpy", label="Upload Image")
# Output textboxes to display the extracted information
name_output = gr.Textbox(label="Patient Name")
age_output = gr.Textbox(label="Age")
gender_output = gr.Textbox(label="Gender")
phone_output = gr.Textbox(label="Phone Number")
# Button to trigger image processing and text extraction
process_button = gr.Button("Process Image")
# When the button is clicked, process the image and show results in textboxes
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=[name_output, age_output, gender_output, phone_output])
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()