from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer import gradio as gr import cv2 import easyocr from simple_salesforce import Salesforce import re # Salesforce credentials Salesforce_User_Name = 'sathkruthatech@hms.com' # Your Salesforce username Salesforce_Password = 'Sathkrutha@06' SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com' SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw' # Initialize EasyOCR reader for text extraction reader = easyocr.Reader(['en']) # Load pre-trained LayoutLM model and tokenizer model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") # Salesforce Connection Setup sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN) # Function to extract text using EasyOCR and process with LayoutLM def extract_patient_info(image): # Convert the uploaded image to RGB (required by LayoutLM) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Use EasyOCR to extract text from the image result = reader.readtext(image_rgb) # Debug: Print OCR result print("OCR Result:", result) # Extracted text from OCR extracted_text = " ".join([detection[1] for detection in result]) # Debug: Print the extracted text print("Extracted Text:", extracted_text) # Extract relevant details (Name, Age, Gender, Phone number) from the extracted text details = extract_details_from_text(extracted_text) # Debug: Print parsed details print("Parsed Details:", details) # Create a record in Salesforce using the extracted details create_salesforce_record(details) # Return the extracted text for display return extracted_text # Function to extract details from the extracted text using regex def extract_details_from_text(extracted_text): # Regex patterns to match Name, Age, Gender, and Phone number details = {} # Extract Name name_match = re.search(r"Name[:\s]*([A-Za-z\s]+)", extracted_text) if name_match: details['Name'] = name_match.group(1) else: print("Error: Name not found!") # Extract Age age_match = re.search(r"Age[:\s]*([\d]+)", extracted_text) if age_match: details['Age'] = age_match.group(1) else: print("Error: Age not found!") # Extract Gender gender_match = re.search(r"Gender[:\s]*(Male|Female)", extracted_text, re.IGNORECASE) if gender_match: details['Gender'] = gender_match.group(1) else: print("Error: Gender not found!") # Extract Phone number phone_match = re.search(r"Phone number[:\s]*([\d]+)", extracted_text) if phone_match: details['Phone Number'] = phone_match.group(1) else: print("Error: Phone number not found!") return details # Function to create a record in Salesforce using the extracted details def create_salesforce_record(details): # Prepare the data to be inserted into Salesforce data = { 'Name__c': details['Name'], 'Age__c': int(details['Age']), 'Gender__c': details['Gender'], 'Phone_Number__c': details['Phone Number'] } # Debug: Print the data before inserting into Salesforce print("Data to be inserted into Salesforce:", data) try: # Create a new record in Salesforce sf.Patient_Registration__c.create(data) print("Salesforce record created successfully!") except Exception as e: # Handle any exceptions during Salesforce record creation print(f"Error creating Salesforce record: {e}") # Gradio interface setup with gr.Blocks() as demo: gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration") # Image upload component image_input = gr.Image(type="numpy", label="Upload Image") # Output textboxes to display the extracted information output_text = gr.Textbox(label="Extracted Text") # Button to trigger image processing and text extraction process_button = gr.Button("Process Image") # When the button is clicked, process the image and show results in textboxes process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text) # Launch the Gradio app if __name__ == "__main__": demo.launch()