Update app.py
Browse files
app.py
CHANGED
@@ -1,47 +1,46 @@
|
|
1 |
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
|
2 |
import gradio as gr
|
3 |
import cv2
|
4 |
-
import
|
|
|
|
|
|
|
5 |
|
6 |
# Load pre-trained LayoutLM model and tokenizer
|
7 |
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
|
8 |
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
|
9 |
|
10 |
-
# Function to extract text using LayoutLM
|
11 |
def extract_patient_info(image):
|
12 |
# Convert the uploaded image to RGB (required by LayoutLM)
|
13 |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
14 |
-
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
# Tokenize
|
20 |
-
inputs = tokenizer(
|
21 |
outputs = model(**inputs)
|
22 |
-
|
23 |
-
# For
|
24 |
-
|
25 |
-
return "Extracted text using LayoutLM will go here."
|
26 |
|
27 |
# Gradio interface setup
|
28 |
with gr.Blocks() as demo:
|
29 |
-
gr.Markdown("### OCR Using LayoutLM Pretrained Model")
|
30 |
-
|
31 |
# Image upload component
|
32 |
image_input = gr.Image(type="numpy", label="Upload Image")
|
33 |
-
|
34 |
# Output textboxes to display the extracted information
|
35 |
-
|
36 |
-
|
37 |
-
gender_output = gr.Textbox(label="Gender")
|
38 |
-
phone_output = gr.Textbox(label="Phone Number")
|
39 |
-
|
40 |
# Button to trigger image processing and text extraction
|
41 |
process_button = gr.Button("Process Image")
|
42 |
-
|
43 |
# When the button is clicked, process the image and show results in textboxes
|
44 |
-
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=
|
45 |
|
46 |
# Launch the Gradio app
|
47 |
if __name__ == "__main__":
|
|
|
1 |
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
|
2 |
import gradio as gr
|
3 |
import cv2
|
4 |
+
import easyocr
|
5 |
+
|
6 |
+
# Initialize EasyOCR reader for text extraction
|
7 |
+
reader = easyocr.Reader(['en'])
|
8 |
|
9 |
# Load pre-trained LayoutLM model and tokenizer
|
10 |
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
|
11 |
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
|
12 |
|
13 |
+
# Function to extract text using EasyOCR and process with LayoutLM
|
14 |
def extract_patient_info(image):
|
15 |
# Convert the uploaded image to RGB (required by LayoutLM)
|
16 |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
17 |
+
|
18 |
+
# Use EasyOCR to extract text from the image
|
19 |
+
result = reader.readtext(image_rgb)
|
20 |
+
extracted_text = " ".join([detection[1] for detection in result])
|
21 |
+
|
22 |
+
# Tokenize the extracted text with LayoutLM
|
23 |
+
inputs = tokenizer(extracted_text, return_tensors="pt")
|
24 |
outputs = model(**inputs)
|
25 |
+
|
26 |
+
# For this example, we return the extracted text (you can further process LayoutLM's output)
|
27 |
+
return extracted_text
|
|
|
28 |
|
29 |
# Gradio interface setup
|
30 |
with gr.Blocks() as demo:
|
31 |
+
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR")
|
32 |
+
|
33 |
# Image upload component
|
34 |
image_input = gr.Image(type="numpy", label="Upload Image")
|
35 |
+
|
36 |
# Output textboxes to display the extracted information
|
37 |
+
output_text = gr.Textbox(label="Extracted Text")
|
38 |
+
|
|
|
|
|
|
|
39 |
# Button to trigger image processing and text extraction
|
40 |
process_button = gr.Button("Process Image")
|
41 |
+
|
42 |
# When the button is clicked, process the image and show results in textboxes
|
43 |
+
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
|
44 |
|
45 |
# Launch the Gradio app
|
46 |
if __name__ == "__main__":
|