File size: 4,467 Bytes
a7847d1 526d984 8c03e36 47a909e 8c03e36 526d984 a7847d1 8c03e36 526d984 a7847d1 526d984 88fdb4e 526d984 88fdb4e 526d984 47a909e 8c03e36 88fdb4e 8c03e36 47a909e 526d984 a7847d1 47a909e 8c03e36 47a909e 8c03e36 47a909e 88fdb4e 47a909e 88fdb4e 47a909e 88fdb4e 47a909e 88fdb4e 47a909e 8c03e36 47a909e 8c03e36 47a909e 8c03e36 88fdb4e 8c03e36 a7847d1 8c03e36 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import easyocr
from simple_salesforce import Salesforce
import re
# Salesforce credentials
Salesforce_User_Name = '[email protected]' # Your Salesforce username
Salesforce_Password = 'Sathkrutha@06'
SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com'
SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw'
# Initialize EasyOCR reader for text extraction
reader = easyocr.Reader(['en'])
# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
# Salesforce Connection Setup
sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN)
# Function to extract text using EasyOCR and process with LayoutLM
def extract_patient_info(image):
# Convert the uploaded image to RGB (required by LayoutLM)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use EasyOCR to extract text from the image
result = reader.readtext(image_rgb)
# Debug: Print OCR result
print("OCR Result:", result)
# Extracted text from OCR
extracted_text = " ".join([detection[1] for detection in result])
# Debug: Print the extracted text
print("Extracted Text:", extracted_text)
# Extract relevant details (Name, Age, Gender, Phone number) from the extracted text
details = extract_details_from_text(extracted_text)
# Debug: Print parsed details
print("Parsed Details:", details)
# Create a record in Salesforce using the extracted details
create_salesforce_record(details)
# Return the extracted text for display
return extracted_text
# Function to extract details from the extracted text using regex
def extract_details_from_text(extracted_text):
# Regex patterns to match Name, Age, Gender, and Phone number
details = {}
# Extract Name
name_match = re.search(r"Name[:\s]*([A-Za-z\s]+)", extracted_text)
if name_match:
details['Name'] = name_match.group(1)
else:
print("Error: Name not found!")
# Extract Age
age_match = re.search(r"Age[:\s]*([\d]+)", extracted_text)
if age_match:
details['Age'] = age_match.group(1)
else:
print("Error: Age not found!")
# Extract Gender
gender_match = re.search(r"Gender[:\s]*(Male|Female)", extracted_text, re.IGNORECASE)
if gender_match:
details['Gender'] = gender_match.group(1)
else:
print("Error: Gender not found!")
# Extract Phone number
phone_match = re.search(r"Phone number[:\s]*([\d]+)", extracted_text)
if phone_match:
details['Phone Number'] = phone_match.group(1)
else:
print("Error: Phone number not found!")
return details
# Function to create a record in Salesforce using the extracted details
def create_salesforce_record(details):
# Prepare the data to be inserted into Salesforce
data = {
'Name__c': details['Name'],
'Age__c': int(details['Age']),
'Gender__c': details['Gender'],
'Phone_Number__c': details['Phone Number']
}
# Debug: Print the data before inserting into Salesforce
print("Data to be inserted into Salesforce:", data)
try:
# Create a new record in Salesforce
sf.Patient_Registration__c.create(data)
print("Salesforce record created successfully!")
except Exception as e:
# Handle any exceptions during Salesforce record creation
print(f"Error creating Salesforce record: {e}")
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration")
# Image upload component
image_input = gr.Image(type="numpy", label="Upload Image")
# Output textboxes to display the extracted information
output_text = gr.Textbox(label="Extracted Text")
# Button to trigger image processing and text extraction
process_button = gr.Button("Process Image")
# When the button is clicked, process the image and show results in textboxes
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()
|