File size: 3,698 Bytes
a7847d1 526d984 8c03e36 47a909e 8c03e36 526d984 a7847d1 8c03e36 526d984 a7847d1 526d984 47a909e 8c03e36 47a909e 526d984 a7847d1 47a909e 8c03e36 47a909e 8c03e36 47a909e 8c03e36 47a909e 8c03e36 47a909e 8c03e36 a7847d1 8c03e36 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import easyocr
from simple_salesforce import Salesforce
import re
# Salesforce credentials
Salesforce_User_Name = '[email protected]' # Your Salesforce username
Salesforce_Password = 'Sathkrutha@06'
SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com'
SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw'
# Initialize EasyOCR reader for text extraction
reader = easyocr.Reader(['en'])
# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
# Salesforce Connection Setup
sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN)
# Function to extract text using EasyOCR and process with LayoutLM
def extract_patient_info(image):
# Convert the uploaded image to RGB (required by LayoutLM)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use EasyOCR to extract text from the image
result = reader.readtext(image_rgb)
extracted_text = " ".join([detection[1] for detection in result])
# Extract relevant details (Name, Age, Gender, Phone number) from the extracted text
details = extract_details_from_text(extracted_text)
# Create a record in Salesforce using the extracted details
create_salesforce_record(details)
# Return the extracted text for display
return extracted_text
# Function to extract details from the extracted text using regex
def extract_details_from_text(extracted_text):
# Regex patterns to match Name, Age, Gender, and Phone number
details = {}
# Extract Name
name_match = re.search(r"Name[:\s]*([A-Za-z\s]+)", extracted_text)
if name_match:
details['Name'] = name_match.group(1)
# Extract Age
age_match = re.search(r"Age[:\s]*([\d]+)", extracted_text)
if age_match:
details['Age'] = age_match.group(1)
# Extract Gender
gender_match = re.search(r"Gender[:\s]*(Male|Female)", extracted_text, re.IGNORECASE)
if gender_match:
details['Gender'] = gender_match.group(1)
# Extract Phone number
phone_match = re.search(r"Phone number[:\s]*([\d]+)", extracted_text)
if phone_match:
details['Phone Number'] = phone_match.group(1)
return details
# Function to create a record in Salesforce using the extracted details
def create_salesforce_record(details):
# Prepare the data to be inserted into Salesforce
data = {
'Name__c': details['Name'],
'Age__c': int(details['Age']),
'Gender__c': details['Gender'],
'Phone_Number__c': details['Phone Number']
}
# Create a new record in Salesforce
sf.Patient_Registration__c.create(data)
print("Salesforce record created successfully!")
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration")
# Image upload component
image_input = gr.Image(type="numpy", label="Upload Image")
# Output textboxes to display the extracted information
output_text = gr.Textbox(label="Extracted Text")
# Button to trigger image processing and text extraction
process_button = gr.Button("Process Image")
# When the button is clicked, process the image and show results in textboxes
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()
|