from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer import gradio as gr import cv2 import easyocr # Initialize EasyOCR reader for text extraction reader = easyocr.Reader(['en']) # Load pre-trained LayoutLM model and tokenizer model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") # Function to extract text using EasyOCR and process with LayoutLM def extract_patient_info(image): # Convert the uploaded image to RGB (required by LayoutLM) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Use EasyOCR to extract text from the image result = reader.readtext(image_rgb) extracted_text = " ".join([detection[1] for detection in result]) # Tokenize the extracted text with LayoutLM inputs = tokenizer(extracted_text, return_tensors="pt") outputs = model(**inputs) # For this example, we return the extracted text (you can further process LayoutLM's output) return extracted_text # Gradio interface setup with gr.Blocks() as demo: gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR") # Image upload component image_input = gr.Image(type="numpy", label="Upload Image") # Output textboxes to display the extracted information output_text = gr.Textbox(label="Extracted Text") # Button to trigger image processing and text extraction process_button = gr.Button("Process Image") # When the button is clicked, process the image and show results in textboxes process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text) # Launch the Gradio app if __name__ == "__main__": demo.launch()