from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import easyocr

# Initialize EasyOCR reader for text extraction
reader = easyocr.Reader(['en'])

# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")

# Function to extract text using EasyOCR and process with LayoutLM
def extract_patient_info(image):
    # Convert the uploaded image to RGB (required by LayoutLM)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Use EasyOCR to extract text from the image
    result = reader.readtext(image_rgb)
    extracted_text = " ".join([detection[1] for detection in result])

    # Tokenize the extracted text with LayoutLM
    inputs = tokenizer(extracted_text, return_tensors="pt")
    outputs = model(**inputs)

    # For this example, we return the extracted text (you can further process LayoutLM's output)
    return extracted_text

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR")

    # Image upload component
    image_input = gr.Image(type="numpy", label="Upload Image")

    # Output textboxes to display the extracted information
    output_text = gr.Textbox(label="Extracted Text")

    # Button to trigger image processing and text extraction
    process_button = gr.Button("Process Image")

    # When the button is clicked, process the image and show results in textboxes
    process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch()