DSatishchandra commited on
Commit
526d984
·
verified ·
1 Parent(s): e7d410b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -23
app.py CHANGED
@@ -1,47 +1,46 @@
1
  from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
2
  import gradio as gr
3
  import cv2
4
- import torch
 
 
 
5
 
6
  # Load pre-trained LayoutLM model and tokenizer
7
  model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
8
  tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
9
 
10
- # Function to extract text using LayoutLM
11
  def extract_patient_info(image):
12
  # Convert the uploaded image to RGB (required by LayoutLM)
13
  image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
14
-
15
- # Here you can add code to process the image for LayoutLM (like extracting features)
16
- # Since LayoutLM works better with structured documents, we can return simple OCR text
17
- # for demonstration purposes:
18
-
19
- # Tokenize and process the image text (you can customize the process depending on your image)
20
- inputs = tokenizer(image_rgb, return_tensors="pt")
21
  outputs = model(**inputs)
22
-
23
- # For demonstration purposes, we return some dummy output
24
- # You can adjust this based on your image and use case (extract more meaningful text)
25
- return "Extracted text using LayoutLM will go here."
26
 
27
  # Gradio interface setup
28
  with gr.Blocks() as demo:
29
- gr.Markdown("### OCR Using LayoutLM Pretrained Model")
30
-
31
  # Image upload component
32
  image_input = gr.Image(type="numpy", label="Upload Image")
33
-
34
  # Output textboxes to display the extracted information
35
- name_output = gr.Textbox(label="Patient Name")
36
- age_output = gr.Textbox(label="Age")
37
- gender_output = gr.Textbox(label="Gender")
38
- phone_output = gr.Textbox(label="Phone Number")
39
-
40
  # Button to trigger image processing and text extraction
41
  process_button = gr.Button("Process Image")
42
-
43
  # When the button is clicked, process the image and show results in textboxes
44
- process_button.click(fn=extract_patient_info, inputs=image_input, outputs=[name_output, age_output, gender_output, phone_output])
45
 
46
  # Launch the Gradio app
47
  if __name__ == "__main__":
 
1
  from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
2
  import gradio as gr
3
  import cv2
4
+ import easyocr
5
+
6
+ # Initialize EasyOCR reader for text extraction
7
+ reader = easyocr.Reader(['en'])
8
 
9
  # Load pre-trained LayoutLM model and tokenizer
10
  model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
11
  tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
12
 
13
+ # Function to extract text using EasyOCR and process with LayoutLM
14
  def extract_patient_info(image):
15
  # Convert the uploaded image to RGB (required by LayoutLM)
16
  image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
17
+
18
+ # Use EasyOCR to extract text from the image
19
+ result = reader.readtext(image_rgb)
20
+ extracted_text = " ".join([detection[1] for detection in result])
21
+
22
+ # Tokenize the extracted text with LayoutLM
23
+ inputs = tokenizer(extracted_text, return_tensors="pt")
24
  outputs = model(**inputs)
25
+
26
+ # For this example, we return the extracted text (you can further process LayoutLM's output)
27
+ return extracted_text
 
28
 
29
  # Gradio interface setup
30
  with gr.Blocks() as demo:
31
+ gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR")
32
+
33
  # Image upload component
34
  image_input = gr.Image(type="numpy", label="Upload Image")
35
+
36
  # Output textboxes to display the extracted information
37
+ output_text = gr.Textbox(label="Extracted Text")
38
+
 
 
 
39
  # Button to trigger image processing and text extraction
40
  process_button = gr.Button("Process Image")
41
+
42
  # When the button is clicked, process the image and show results in textboxes
43
+ process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
44
 
45
  # Launch the Gradio app
46
  if __name__ == "__main__":