DSatishchandra commited on
Commit
a7847d1
·
verified ·
1 Parent(s): e2ac2d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
2
+ import gradio as gr
3
+ import cv2
4
+ import torch
5
+
6
+ # Load pre-trained LayoutLM model and tokenizer
7
+ model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
8
+ tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
9
+
10
+ # Function to extract text using LayoutLM
11
+ def extract_patient_info(image):
12
+ # Convert the uploaded image to RGB (required by LayoutLM)
13
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
14
+
15
+ # Here you can add code to process the image for LayoutLM (like extracting features)
16
+ # Since LayoutLM works better with structured documents, we can return simple OCR text
17
+ # for demonstration purposes:
18
+
19
+ # Tokenize and process the image text (you can customize the process depending on your image)
20
+ inputs = tokenizer(image_rgb, return_tensors="pt")
21
+ outputs = model(**inputs)
22
+
23
+ # For demonstration purposes, we return some dummy output
24
+ # You can adjust this based on your image and use case (extract more meaningful text)
25
+ return "Extracted text using LayoutLM will go here."
26
+
27
+ # Gradio interface setup
28
+ with gr.Blocks() as demo:
29
+ gr.Markdown("### OCR Using LayoutLM Pretrained Model")
30
+
31
+ # Image upload component
32
+ image_input = gr.Image(type="numpy", label="Upload Image")
33
+
34
+ # Output textboxes to display the extracted information
35
+ name_output = gr.Textbox(label="Patient Name")
36
+ age_output = gr.Textbox(label="Age")
37
+ gender_output = gr.Textbox(label="Gender")
38
+ phone_output = gr.Textbox(label="Phone Number")
39
+
40
+ # Button to trigger image processing and text extraction
41
+ process_button = gr.Button("Process Image")
42
+
43
+ # When the button is clicked, process the image and show results in textboxes
44
+ process_button.click(fn=extract_patient_info, inputs=image_input, outputs=[name_output, age_output, gender_output, phone_output])
45
+
46
+ # Launch the Gradio app
47
+ if __name__ == "__main__":
48
+ demo.launch()