File size: 2,026 Bytes
a7847d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import torch

# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")

# Function to extract text using LayoutLM
def extract_patient_info(image):
    # Convert the uploaded image to RGB (required by LayoutLM)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Here you can add code to process the image for LayoutLM (like extracting features)
    # Since LayoutLM works better with structured documents, we can return simple OCR text 
    # for demonstration purposes:
    
    # Tokenize and process the image text (you can customize the process depending on your image)
    inputs = tokenizer(image_rgb, return_tensors="pt")
    outputs = model(**inputs)
    
    # For demonstration purposes, we return some dummy output
    # You can adjust this based on your image and use case (extract more meaningful text)
    return "Extracted text using LayoutLM will go here."

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown("### OCR Using LayoutLM Pretrained Model")
    
    # Image upload component
    image_input = gr.Image(type="numpy", label="Upload Image")
    
    # Output textboxes to display the extracted information
    name_output = gr.Textbox(label="Patient Name")
    age_output = gr.Textbox(label="Age")
    gender_output = gr.Textbox(label="Gender")
    phone_output = gr.Textbox(label="Phone Number")
    
    # Button to trigger image processing and text extraction
    process_button = gr.Button("Process Image")
    
    # When the button is clicked, process the image and show results in textboxes
    process_button.click(fn=extract_patient_info, inputs=image_input, outputs=[name_output, age_output, gender_output, phone_output])

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch()