File size: 3,381 Bytes
a7847d1 526d984 8c03e36 526d984 a7847d1 8c03e36 526d984 a7847d1 526d984 a7847d1 526d984 8c03e36 526d984 a7847d1 8c03e36 a7847d1 8c03e36 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 526d984 a7847d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
import gradio as gr
import cv2
import easyocr
from simple_salesforce import Salesforce
# Salesforce credentials
Salesforce_User_Name = '[email protected]' # Your Salesforce username
Salesforce_Password = 'Sathkrutha@06'
SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com'
SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw'
# Initialize EasyOCR reader for text extraction
reader = easyocr.Reader(['en'])
# Load pre-trained LayoutLM model and tokenizer
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased")
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased")
# Salesforce Connection Setup
sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN)
# Function to extract text using EasyOCR and process with LayoutLM
def extract_patient_info(image):
# Convert the uploaded image to RGB (required by LayoutLM)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use EasyOCR to extract text from the image
result = reader.readtext(image_rgb)
extracted_text = " ".join([detection[1] for detection in result])
# Tokenize the extracted text with LayoutLM
inputs = tokenizer(extracted_text, return_tensors="pt")
outputs = model(**inputs)
# Here, extracted_text is already available from EasyOCR, we can extract relevant details
details = extract_details_from_text(extracted_text)
# Create a record in Salesforce using the extracted details
create_salesforce_record(details)
# Return the extracted text
return extracted_text
# Function to extract details from the extracted text (use regex or other methods to extract)
def extract_details_from_text(extracted_text):
# Simple example of extracting details, customize this according to the format of the text
details = {}
details['Name'] = "Shanthi" # Here, add the logic to extract the actual name
details['Age'] = "39" # Similarly, extract age, gender, and phone number
details['Gender'] = "Female"
details['Phone Number'] = "9955337097"
return details
# Function to create a record in Salesforce
def create_salesforce_record(details):
data = {
'Name__c': details['Name'],
'Age__c': int(details['Age']),
'Gender__c': details['Gender'],
'Phone_Number__c': details['Phone Number']
}
# Create a new record in Salesforce
sf.Patient_Registration__c.create(data)
print("Salesforce record created successfully!")
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration")
# Image upload component
image_input = gr.Image(type="numpy", label="Upload Image")
# Output textboxes to display the extracted information
output_text = gr.Textbox(label="Extracted Text")
# Button to trigger image processing and text extraction
process_button = gr.Button("Process Image")
# When the button is clicked, process the image and show results in textboxes
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text)
# Launch the Gradio app
if __name__ == "__main__":
demo.launch()
|