Spaces:

WebashalarForML
/

ResumeExtractor2

Build error

App Files Files Community

WebashalarForML commited on Sep 28, 2024

Commit

71c6bbf

verified ·

1 Parent(s): 99e545b

Upload 5 files

Browse files

Files changed (5) hide show

utils/beckup.py +298 -0
utils/error.py +45 -0
utils/fileTotext.py +127 -0
utils/mistral.py +377 -0
utils/spacy.py +246 -0

utils/beckup.py ADDED Viewed

	@@ -0,0 +1,298 @@

+# mistral.py
+import os
+import json
+import logging
+from huggingface_hub import InferenceClient
+from huggingface_hub.utils._errors import BadRequestError
+from dotenv import load_dotenv
+from utils.fileTotext import extract_text_based_on_format
+import re
+from utils.spacy import Parser_from_model
+# Load environment variables from .env file
+load_dotenv()
+# Authenticate with Hugging Face
+HFT = os.getenv('HF_TOKEN')
+if not HFT:
+    raise ValueError("Hugging Face token is not set in environment variables.")
+client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
+# Function to clean model output
+def Data_Cleaner(text):
+    pattern = r".*?format:"
+    result = re.split(pattern, text, maxsplit=1)
+    if len(result) > 1:
+        text_after_format = result[1].strip().strip('`').strip('json')
+    else:
+        text_after_format = text.strip().strip('`').strip('json')
+    return text_after_format
+# Function to call Mistral and process output
+def Model_ProfessionalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "professional": {{
+             "skills": "Extract and list all technical skills, non-technical skills, programming languages, frameworks, domains, and technologies based on the resume.",
+             "soft_skills": "Extract non-technical skills, Communication skills, and soft skills based on the resume."
+             "projects": "Include only the project names, titles, or headers mentioned in the resume. ",
+             "projects_experience": ["Include overall project Experiences and about project in short mentioned in the resume.] ",
+             "experience": "Include the total experience in months or years as mentioned in the resume.",
+             "companies_worked_at": "Include the names of all companies worked at according to the resume. ",
+             "certification": "Include any certifications obtained based on the resume. ",
+             "worked_as": "Include the names of roles worked as according to the resume.",
+             "qualification":"Extract and list the qualifications based on the resume, (qualifications likes B.Tech). If none are found, return 'No education listed'.",
+             "course": "Extract the name of the Learning Course completed based on the resume. If not found, return 'No Course listed'.",
+             "university": "Extract the name of the university or Collage or Intitute attended based on the resume. If not found, return 'No university listed'.",
+             "year_of_graduation": "Extract the year of graduation from the resume. If not found, return 'No year of graduation listed'."
+        }}
+    }}
+    Json Output:
+    '''
+    }
+    response = ""
+    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
+        response += message.choices[0].delta.content
+    try:
+        clean_response = Data_Cleaner(response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        logging.error(f"JSON Decode Error: {e}")
+        return {}
+    return parsed_response
+def Model_PersonalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "personal": {{
+            "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
+            "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
+            "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
+            "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
+            "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
+        }}
+    }}
+    output:
+    '''
+    }
+    # Response
+    response = ""
+    for message in client.chat_completion(
+        messages=[system_role, user_prompt],
+        max_tokens=3000,
+        stream=True,
+        temperature=0.35,
+    ):
+        response += message.choices[0].delta.content
+    # Handle cases where the response might have formatting issues
+    try:
+        #print('The Og response:-->',response)
+        clean_response=Data_Cleaner(response)
+        #print("After data cleaning",clean_response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        print("JSON Decode Error:", e)
+        print("Raw Response:", response)
+        return {}
+    return parsed_response
+# # Fallback to SpaCy if Mistral fails
+def process_resume_data(file_path):
+    resume_text, hyperlinks = extract_text_based_on_format(file_path)
+    print("Resume converted to text successfully.")
+    if not resume_text:
+        return {"error": "Text extraction failed"}
+    # Attempt to use Mistral model for parsing
+    try:
+        # Extract personal details using Mistral
+        per_data = Model_PersonalDetails_Output(resume_text, client)
+        # Extract professional details using Mistral
+        pro_data = Model_ProfessionalDetails_Output(resume_text, client)
+        # Check if per_data and pro_data have been populated correctly
+        if not per_data:
+            logging.warning("Mistral personal data extraction failed.")
+            per_data = {}
+        if not pro_data:
+            logging.warning("Mistral professional data extraction failed.")
+            pro_data = {}
+        # Combine both personal and professional details into a structured output
+        result = {
+            "personal": {
+                "name":  per_data.get('personal', {}).get('name', 'Not found'),
+                "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
+                "email": per_data.get('personal', {}).get('email', 'Not found'),
+                "location": per_data.get('personal', {}).get('Address', 'Not found'),
+                "link": hyperlinks
+            },
+            "professional": {
+                "skills": pro_data.get('professional', {}).get('skills', 'Not found'),
+                "soft_skills": pro_data.get('professional', {}).get('soft_skills', 'Not found'),
+                "experience": [
+                    {
+                        "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
+                        "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
+                        "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
+                        "years": pro_data.get('professional', {}).get('experience', 'Not found'),
+                        "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
+                    }
+                ],
+                "education": [
+                    {
+                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
+                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
+                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
+                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
+                    }
+                ]
+            }
+        }
+        # If Mistral produces valid output, return it
+        if per_data or pro_data:
+            print("------Mistral-----")
+            return result
+        else:
+            raise ValueError("Mistral returned no output")
+    # Handle HuggingFace API or Mistral model errors
+    except BadRequestError as e:
+        logging.error(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+        print(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+    except Exception as e:
+        logging.error(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+        print(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+    # Fallback to SpaCy if Mistral fails
+    logging.warning("Mistral failed, switching to SpaCy.")
+    print("------Spacy-----")
+    return Parser_from_model(file_path)
+# /////////////////////////////////////////////
+# ////////////////Spacy.py/////////////////////
+# /////////////////////////////////////////////
+import spacy
+from spacy.training import Example
+from spacy.util import minibatch, compounding
+from pathlib import Path
+from spacy.tokens import DocBin
+import random
+# Load the training data from the .spacy file
+def load_data_from_spacy_file(file_path):
+    # Initialize a blank English model to ensure compatibility
+    nlp = spacy.blank("en")
+    # Load the DocBin object and get documents
+    try:
+        doc_bin = DocBin().from_disk(file_path)
+        docs = list(doc_bin.get_docs(nlp.vocab))
+        return docs
+    except Exception as e:
+        print(f"Error loading data from .spacy file: {e}")
+        return []
+# Train model function
+def train_model(epochs, model_path):
+    # Initialize a blank English model
+    nlp = spacy.blank("en")
+    # Create an NER component and add it to the pipeline
+    if "ner" not in nlp.pipe_names:
+        ner = nlp.add_pipe("ner")
+    nlp.add_pipe("sentencizer")
+    # Define all possible entity labels
+    labels = [
+        "PERSON", "CONTACT", "EMAIL", "ABOUT", "EXPERIENCE", "YEARS_EXPERIENCE",
+        "UNIVERSITY", "SOFT_SKILL", "INSTITUTE", "LAST_QUALIFICATION_YEAR", "JOB_TITLE",
+        "COMPANY", "COURSE", "DOB", "HOBBIES", "LINK", "SCHOOL", "QUALIFICATION",
+        "LANGUAGE", "LOCATION", "PROJECTS", "SKILL", "CERTIFICATE"
+    ]
+    # Add labels to the NER component
+    for label in labels:
+        ner.add_label(label)
+    # Load the training data
+    train_data = load_data_from_spacy_file("./data/Spacy_data.spacy")
+    # Start the training
+    optimizer = nlp.begin_training()
+    epoch_losses = []
+    best_loss = float('inf')
+    # Training loop
+    for epoch in range(epochs):
+        losses = {}
+        random.shuffle(train_data)  # Shuffle data for better training
+        # Create minibatches
+        batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001))
+        for batch in batches:
+            texts, annotations = zip(*[(doc.text, {"entities": [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]}) for doc in batch])
+            # Convert to Example objects
+            examples = [Example.from_dict(nlp.make_doc(text), annotation) for text, annotation in zip(texts, annotations)]
+            # Update the model
+            nlp.update(examples, sgd=optimizer, drop=0.35, losses=losses)
+        current_loss = losses.get("ner", float('inf'))
+        epoch_losses.append(current_loss)
+        print(f"Losses at epoch {epoch + 1}: {losses}")
+        # Stop training if the loss is zero
+        if current_loss == 0:
+            break
+        # Save the best model
+        if current_loss < best_loss:
+            best_loss = current_loss
+            nlp.to_disk(model_path)
+    # Save the final model
+    nlp.to_disk(model_path)
+    return epoch_losses

utils/error.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import logging
+from flask import render_template, request
+# Set up logging for errors
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.ERROR)
+# File handler for logging errors to a file
+file_handler = logging.FileHandler('app_error.log')
+file_handler.setLevel(logging.ERROR)
+file_formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
+file_handler.setFormatter(file_formatter)
+logger.addHandler(file_handler)
+# Console handler for logging errors to the terminal
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.ERROR)
+console_formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
+console_handler.setFormatter(console_formatter)
+logger.addHandler(console_handler)
+# 404 Error Handler
+def page_not_found(e):
+    logger.error(f"404 Error: {request.url}")
+    return render_template('404.html'), 404
+# 500 Error Handler
+def internal_server_error(e):
+    logger.error(f"500 Error: {e}, URL: {request.url}")
+    return render_template('500.html'), 500
+# File Not Found Error Handler
+def handle_file_not_found():
+    logger.error("File not found.")
+    return render_template('error.html', message="The file you are looking for does not exist."), 404
+# Invalid File Type Error Handler
+def handle_invalid_file_type():
+    logger.error("Invalid file type.")
+    return render_template('error.html', message="Invalid file type. Allowed types: pdf, docx, rsf, odt, png, jpg, jpeg."), 400
+# File Processing Error Handler
+def handle_file_processing_error():
+    logger.error("File processing failed.")
+    return render_template('error.html', message="Failed to process the file."), 500

utils/fileTotext.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import re
+import fitz
+import logging
+from PIL import Image
+from pdf2image import convert_from_path
+import platform
+import pytesseract
+import docx
+from odf.opendocument import load as load_odt
+from odf.text import P
+# Path to tesseract executable (ensure it points to tesseract.exe)
+if platform.system() == "Windows":
+    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+else:
+    # For Hugging Face Spaces or other Linux environments
+    pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
+# # Set up logging
+# logging.basicConfig(
+#     level=logging.DEBUG,
+#     format='%(asctime)s - %(levelname)s - %(message)s',
+#     handlers=[logging.StreamHandler()]
+# )
+# # Path to Tesseract executable
+# tesseract_path = os.getenv('TESSERACT_CMD', '/usr/bin/tesseract')
+# pytesseract.pytesseract.tesseract_cmd = tesseract_path
+# Function to extract text from PDF using PyMuPDF
+def extract_text_from_pdf(file_path):
+    text = ""
+    hyperlinks = []
+    try:
+        doc = fitz.open(file_path)
+        for page_num in range(doc.page_count):
+            page = doc.load_page(page_num)
+            page_text = page.get_text("text")
+            if not page_text.strip():
+                images = convert_from_path(file_path, dpi=300)
+                for image in images:
+                    text += pytesseract.image_to_string(image)
+            else:
+                text += page_text
+            links = page.get_links()
+            for link in links:
+                if link.get("uri"):
+                    hyperlinks.append(link["uri"])
+    except Exception as e:
+        logging.error(f"Error extracting text or hyperlinks from PDF: {e}")
+        return "", []
+    return text, list(set(hyperlinks))
+# Function to extract text from DOCX
+def extract_text_from_docx(file_path):
+    try:
+        doc = docx.Document(file_path)
+        text = "\n".join([para.text for para in doc.paragraphs])
+        return text
+    except Exception as e:
+        logging.error(f"Error extracting text from DOCX: {e}")
+        return ""
+# Function to extract text from RSF (assuming text-based format)
+def extract_text_from_rsf(file_path):
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            return file.read()
+    except Exception as e:
+        logging.error(f"Error extracting text from RSF: {e}")
+        return ""
+# Function to extract text from ODT
+def extract_text_from_odt(file_path):
+    try:
+        odt_doc = load_odt(file_path)
+        text_elements = odt_doc.getElementsByType(P)
+        text = "\n".join([te.firstChild.data for te in text_elements if te.firstChild])
+        return text
+    except Exception as e:
+        logging.error(f"Error extracting text from ODT: {e}")
+        return ""
+# Function to extract text from images using Tesseract
+def extract_text_from_image(file_path):
+    try:
+        img = Image.open(file_path)
+        text = pytesseract.image_to_string(img)
+        return text
+    except Exception as e:
+        logging.error(f"Error extracting text from image: {e}")
+        return ""
+# Function to clean and preprocess the extracted text
+def preprocess_text(text):
+    text = re.sub(r'\s+', ' ', text)
+    text = re.sub(r'\n', ' ', text)
+    text = re.sub(r'(\b\d{3}[-.\s]??\d{3}[-.\s]??\d{4}\b)', r' \1 ', text)
+    return text.strip()
+# Function to automatically detect file format and extract text
+def extract_text_based_on_format(file_path):
+    file_ext = os.path.splitext(file_path)[1].lower()
+    if file_ext == '.pdf':
+        text, hyperlinks = extract_text_from_pdf(file_path)
+    elif file_ext == '.docx':
+        text = extract_text_from_docx(file_path)
+        hyperlinks = []
+    elif file_ext == '.rsf':
+        text = extract_text_from_rsf(file_path)
+        hyperlinks = []
+    elif file_ext == '.odt':
+        text = extract_text_from_odt(file_path)
+        hyperlinks = []
+    elif file_ext in ['.png', '.jpg', '.jpeg']:
+        text = extract_text_from_image(file_path)
+        hyperlinks = []
+    else:
+        raise ValueError("Unsupported file format")
+    return text, hyperlinks

utils/mistral.py ADDED Viewed

	@@ -0,0 +1,377 @@

+# mistral.py
+import os
+import json
+import logging
+from huggingface_hub import InferenceClient
+from huggingface_hub.utils._errors import BadRequestError
+from dotenv import load_dotenv
+from utils.fileTotext import extract_text_based_on_format
+import re
+from utils.spacy import Parser_from_model
+# Load environment variables from .env file
+load_dotenv()
+# Authenticate with Hugging Face
+HFT = os.getenv('HF_TOKEN')
+if not HFT:
+    raise ValueError("Hugging Face token is not set in environment variables.")
+client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
+# Function to clean model output
+def Data_Cleaner(text):
+    pattern = r".*?format:"
+    result = re.split(pattern, text, maxsplit=1)
+    if len(result) > 1:
+        text_after_format = result[1].strip().strip('`').strip('json')
+    else:
+        text_after_format = text.strip().strip('`').strip('json')
+    return text_after_format
+# Function to call Mistral and process output
+def Model_ProfessionalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "professional": {{
+            "technical_skills": "List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types.",
+            "non_technical_skills": "Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills.",
+            "tools": "Enumerate all software tools, platforms, and applications (e.g., Figma, Unity, MS Office, etc.) referenced in the resume, distinctly separate from skills.",
+            "projects": "Extract the names or titles of all projects mentioned in the resume.",
+            "projects_experience": "Summarize overall project experiences, providing a brief description of each project as detailed in the resume.",
+            "experience": "Calculate total professional work experience in years and months based on the resume.",
+            "companies_worked_at": "List the names of all companies where employment is mentioned in the resume.",
+            "certifications": "Extract and list all certifications obtained as stated in the resume.",
+            "roles": "Include the names of all job titles or roles held as indicated in the resume.",
+            "qualifications": "List educational qualifications (e.g., B.Tech) from the resume. If none are found, return 'No education listed'.",
+            "courses": "Extract the names of completed courses based on the resume. If none are found, return 'No courses listed'.",
+            "university": "Identify the name of the university, college, or institute attended, based on the resume. If not found, return 'No university listed'.",
+            "year_of_graduation": "Extract the year of graduation from the resume. If not found, return 'No year of graduation listed'."
+        }}
+    }}
+    Json Output:
+    '''
+    }
+    response = ""
+    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
+        response += message.choices[0].delta.content
+    try:
+        clean_response = Data_Cleaner(response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        logging.error(f"JSON Decode Error: {e}")
+        return {}
+    return parsed_response
+def Model_PersonalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "personal": {{
+            "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
+            "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
+            "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
+            "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
+            "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
+        }}
+    }}
+    output:
+    '''
+    }
+    # Response
+    response = ""
+    for message in client.chat_completion(
+        messages=[system_role, user_prompt],
+        max_tokens=3000,
+        stream=True,
+        temperature=0.35,
+    ):
+        response += message.choices[0].delta.content
+    # Handle cases where the response might have formatting issues
+    try:
+        #print('The Og response:-->',response)
+        clean_response=Data_Cleaner(response)
+        #print("After data cleaning",clean_response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        print("JSON Decode Error:", e)
+        print("Raw Response:", response)
+        return {}
+    return parsed_response
+# # Fallback to SpaCy if Mistral fails
+# Add regex pattern for LinkedIn and GitHub links
+linkedin_pattern = r"https?://(?:www\.)?linkedin\.com/[\w\-_/]+"
+github_pattern = r"https?://(?:www\.)?github\.com/[\w\-_/]+"
+email_pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
+contact_pattern = r"^\+?[\d\s\-()]{7,15}$"
+def extract_links(hyperlinks):
+    linkedin_links = []
+    github_links = []
+    # Iterate through the hyperlinks and apply regex to find LinkedIn and GitHub links
+    for link in hyperlinks:
+        if re.match(linkedin_pattern, link):
+            linkedin_links.append(link)
+        elif re.match(github_pattern, link):
+            github_links.append(link)
+    return linkedin_links, github_links
+def is_valid_email(email):
+    email_regex = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
+    return re.match(email_regex, email) is not None
+def is_valid_contact(contact):
+        patterns = [
+        r'^\+91[\s\.\-\/]?\(?0?\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with optional 0 and separators
+        r'^\+91[\s\.\-\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with 10 digits separated
+        r'^\d{5}[\s\-\.\/]?\d{5}$',  # Local format without country code
+        r'^\+91[\s\.\-\/]?\d{10}$',  # +91 with 10 digits together
+        r'^\d{10}$',  # 10 digits together
+        r'^\+91[\s\.\-\/]?\(?\d{5}\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$'  # +91 with varying separators
+        r'\+1\s\(\d{3}\)\s\d{3}-\d{4} ',               # USA/Canada Intl +1 (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}-\d{4} ',                    # USA/Canada STD (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{4} ',                   # USA/Canada (XXX) XXX XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{3} ',                   # USA/Canada (XXX) XXX XXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\s\d{4}\s\d{6} ',                        # UK Intl +44 XXXX XXXXXX
+        r'\+44\s\d{3}\s\d{3}\s\d{4} ',                 # UK Intl +44 XXX XXX XXXX
+        r'0\d{4}\s\d{6} ',                             # UK STD 0XXXX XXXXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # UK STD 0XXX XXX XXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\s\d\s\d{4}\s\d{4} ',                    # Australia Intl +61 X XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Australia STD 0X XXXX XXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\s\d{5}-\d{5} ',                         # India Intl +91 XXXXX-XXXXX
+        r'\+91\s\d{4}-\d{6} ',                         # India Intl +91 XXXX-XXXXXX
+        r'\+91\s\d{10} ',                              # India Intl +91 XXXXXXXXXX
+        r'0\d{2}-\d{7} ',                              # India STD 0XX-XXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\s\d{4}\s\d{8} ',                        # Germany Intl +49 XXXX XXXXXXXX
+        r'\+49\s\d{3}\s\d{7} ',                        # Germany Intl +49 XXX XXXXXXX
+        r'0\d{3}\s\d{8} ',                             # Germany STD 0XXX XXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\s\d{3}\s\d{4}\s\d{4} ',                 # China Intl +86 XXX XXXX XXXX
+        r'0\d{3}\s\d{4}\s\d{4} ',                      # China STD 0XXX XXXX XXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\s\d\s\d{4}\s\d{4} ',                    # Japan Intl +81 X XXXX XXXX
+        r'\+81\s\d{2}\s\d{4}\s\d{4} ',                 # Japan Intl +81 XX XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Japan STD 0X XXXX XXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\s\d{2}\s\d{5}-\d{4} ',                  # Brazil Intl +55 XX XXXXX-XXXX
+        r'\+55\s\d{2}\s\d{4}-\d{4} ',                  # Brazil Intl +55 XX XXXX-XXXX
+        r'0\d{2}\s\d{4}\s\d{4} ',                      # Brazil STD 0XX XXXX XXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\s\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',      # France Intl +33 X XX XX XX XX
+        r'0\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',           # France STD 0X XX XX XX XX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\s\d{3}\s\d{3}-\d{2}-\d{2} ',             # Russia Intl +7 XXX XXX-XX-XX
+        r'8\s\d{3}\s\d{3}-\d{2}-\d{2} ',               # Russia STD 8 XXX XXX-XX-XX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\s\d{2}\s\d{3}\s\d{4} ',                 # South Africa Intl +27 XX XXX XXXX
+        r'0\d{2}\s\d{3}\s\d{4} ',                      # South Africa STD 0XX XXX XXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+52\s\d{3}\s\d{3}\s\d{4} ',                 # Mexico Intl +52 XXX XXX XXXX
+        r'\+52\s\d{2}\s\d{4}\s\d{4} ',                 # Mexico Intl +52 XX XXXX XXXX
+        r'01\s\d{3}\s\d{4} ',                          # Mexico STD 01 XXX XXXX
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\s\d{3}\s\d{3}\s\d{4} ',                # Nigeria Intl +234 XXX XXX XXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # Nigeria STD 0XXX XXX XXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\s\d\s\d{3}\s\d{4} ',                   # UAE Intl +971 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # UAE STD 0X XXX XXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{3}\s\d{3}\s\d{4} ',              # Argentina Intl +54 9 XXX XXX XXXX
+        r'\+54\s\d{1}\s\d{4}\s\d{4} ',                 # Argentina Intl +54 X XXXX XXXX
+        r'0\d{3}\s\d{4} ',                             # Argentina STD 0XXX XXXX
+        r'\+54\d{10} ',                                # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\s\d\s\d{3}\s\d{4} ',                   # Saudi Intl +966 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # Saudi STD 0X XXX XXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\+1\s\d{3}\s\d{3}\s\d{4} ',                  # +1 XXX XXX XXXX
+        r'\d{5}\s\d{5} ',                              # XXXXX XXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX (South Africa STD)
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{10} ',                           # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8}'                                     # 0XXXXXXXX
+    ]
+    # Check if the contact matches any of the patterns
+        return any(re.match(pattern, contact) for pattern in patterns) is not None
+def validate_contact_email(personal_data):
+    contact = personal_data.get('contact', 'Not found')
+    email = personal_data.get('email', 'Not found')
+    valid_contact = is_valid_contact(contact) if contact != 'Not found' else False
+    valid_email = is_valid_email(email) if email != 'Not found' else False
+    invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
+    invalid_email = 'Invalid email' if not valid_email else 'Valid email'
+    return valid_contact, invalid_contact, valid_email, invalid_email
+def process_resume_data(file_path):
+    resume_text, hyperlinks = extract_text_based_on_format(file_path)
+    print("Resume converted to text successfully.")
+    if not resume_text:
+        return {"error": "Text extraction failed"}
+    # Extract LinkedIn and GitHub links
+    linkedin_links, github_links = extract_links(hyperlinks)
+    # Attempt to use Mistral model for parsing
+    try:
+        # Extract personal details using Mistral
+        per_data = Model_PersonalDetails_Output(resume_text, client)
+        # Extract professional details using Mistral
+        pro_data = Model_ProfessionalDetails_Output(resume_text, client)
+        # Check if per_data and pro_data have been populated correctly
+        if not per_data:
+            logging.warning("Mistral personal data extraction failed.")
+            per_data = {}
+        if not pro_data:
+            logging.warning("Mistral professional data extraction failed.")
+            pro_data = {}
+        # Combine both personal and professional details into a structured output
+        result = {
+            "personal": {
+                "name": per_data.get('personal', {}).get('name', 'Not found'),
+                "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
+                "email": per_data.get('personal', {}).get('email', 'Not found'),
+                "location": per_data.get('personal', {}).get('Address', 'Not found'),
+                "linkedin": linkedin_links,
+                "github": github_links,
+                "other_links": hyperlinks  # Store remaining links if needed
+            },
+            "professional": {
+                "technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
+                "non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
+                "tools": pro_data.get('professional', {}).get('tools', 'Not found'),
+                "experience": [
+                    {
+                        "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
+                        "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
+                        "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
+                        "years": pro_data.get('professional', {}).get('experience', 'Not found'),
+                        "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
+                    }
+                ],
+                "education": [
+                    {
+                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
+                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
+                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
+                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
+                    }
+                ]
+            }
+        }
+        # Validate contact and email
+        valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
+        result['personal']['valid_contact'] = valid_contact
+        result['personal']['invalid_contact'] = invalid_contact
+        result['personal']['valid_email'] = valid_email
+        result['personal']['invalid_email'] = invalid_email
+        # If Mistral produces valid output, return it
+        if per_data or pro_data:
+            logging.info("Successfully extracted data using Mistral.")
+            print("---------Mistral-------")
+            return result
+        else:
+            raise ValueError("Mistral returned no output")
+    # Handle HuggingFace API or Mistral model errors
+    except BadRequestError as e:
+        logging.error(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+        print(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+    except Exception as e:
+        logging.error(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+        print(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+    # Fallback to SpaCy if Mistral fails
+    logging.warning("Mistral failed, switching to SpaCy.")
+    print("---------SpaCy-------")
+    return Parser_from_model(file_path)

utils/spacy.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import spacy
+import logging
+import json
+from utils.fileTotext import extract_text_based_on_format
+import re
+def is_valid_email(email):
+    email_regex = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
+    return re.match(email_regex, email) is not None
+def is_valid_contact(contact):
+        patterns = [
+        r'^\+91[\s\.\-\/]?\(?0?\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with optional 0 and separators
+        r'^\+91[\s\.\-\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with 10 digits separated
+        r'^\d{5}[\s\-\.\/]?\d{5}$',  # Local format without country code
+        r'^\+91[\s\.\-\/]?\d{10}$',  # +91 with 10 digits together
+        r'^\d{10}$',  # 10 digits together
+        r'^\+91[\s\.\-\/]?\(?\d{5}\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$'  # +91 with varying separators
+        r'\+1\s\(\d{3}\)\s\d{3}-\d{4} ',               # USA/Canada Intl +1 (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}-\d{4} ',                    # USA/Canada STD (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{4} ',                   # USA/Canada (XXX) XXX XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{3} ',                   # USA/Canada (XXX) XXX XXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\s\d{4}\s\d{6} ',                        # UK Intl +44 XXXX XXXXXX
+        r'\+44\s\d{3}\s\d{3}\s\d{4} ',                 # UK Intl +44 XXX XXX XXXX
+        r'0\d{4}\s\d{6} ',                             # UK STD 0XXXX XXXXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # UK STD 0XXX XXX XXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\s\d\s\d{4}\s\d{4} ',                    # Australia Intl +61 X XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Australia STD 0X XXXX XXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\s\d{5}-\d{5} ',                         # India Intl +91 XXXXX-XXXXX
+        r'\+91\s\d{4}-\d{6} ',                         # India Intl +91 XXXX-XXXXXX
+        r'\+91\s\d{10} ',                              # India Intl +91 XXXXXXXXXX
+        r'0\d{2}-\d{7} ',                              # India STD 0XX-XXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\s\d{4}\s\d{8} ',                        # Germany Intl +49 XXXX XXXXXXXX
+        r'\+49\s\d{3}\s\d{7} ',                        # Germany Intl +49 XXX XXXXXXX
+        r'0\d{3}\s\d{8} ',                             # Germany STD 0XXX XXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\s\d{3}\s\d{4}\s\d{4} ',                 # China Intl +86 XXX XXXX XXXX
+        r'0\d{3}\s\d{4}\s\d{4} ',                      # China STD 0XXX XXXX XXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\s\d\s\d{4}\s\d{4} ',                    # Japan Intl +81 X XXXX XXXX
+        r'\+81\s\d{2}\s\d{4}\s\d{4} ',                 # Japan Intl +81 XX XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Japan STD 0X XXXX XXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\s\d{2}\s\d{5}-\d{4} ',                  # Brazil Intl +55 XX XXXXX-XXXX
+        r'\+55\s\d{2}\s\d{4}-\d{4} ',                  # Brazil Intl +55 XX XXXX-XXXX
+        r'0\d{2}\s\d{4}\s\d{4} ',                      # Brazil STD 0XX XXXX XXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\s\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',      # France Intl +33 X XX XX XX XX
+        r'0\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',           # France STD 0X XX XX XX XX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\s\d{3}\s\d{3}-\d{2}-\d{2} ',             # Russia Intl +7 XXX XXX-XX-XX
+        r'8\s\d{3}\s\d{3}-\d{2}-\d{2} ',               # Russia STD 8 XXX XXX-XX-XX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\s\d{2}\s\d{3}\s\d{4} ',                 # South Africa Intl +27 XX XXX XXXX
+        r'0\d{2}\s\d{3}\s\d{4} ',                      # South Africa STD 0XX XXX XXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+52\s\d{3}\s\d{3}\s\d{4} ',                 # Mexico Intl +52 XXX XXX XXXX
+        r'\+52\s\d{2}\s\d{4}\s\d{4} ',                 # Mexico Intl +52 XX XXXX XXXX
+        r'01\s\d{3}\s\d{4} ',                          # Mexico STD 01 XXX XXXX
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\s\d{3}\s\d{3}\s\d{4} ',                # Nigeria Intl +234 XXX XXX XXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # Nigeria STD 0XXX XXX XXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\s\d\s\d{3}\s\d{4} ',                   # UAE Intl +971 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # UAE STD 0X XXX XXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{3}\s\d{3}\s\d{4} ',              # Argentina Intl +54 9 XXX XXX XXXX
+        r'\+54\s\d{1}\s\d{4}\s\d{4} ',                 # Argentina Intl +54 X XXXX XXXX
+        r'0\d{3}\s\d{4} ',                             # Argentina STD 0XXX XXXX
+        r'\+54\d{10} ',                                # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\s\d\s\d{3}\s\d{4} ',                   # Saudi Intl +966 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # Saudi STD 0X XXX XXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\+1\s\d{3}\s\d{3}\s\d{4} ',                  # +1 XXX XXX XXXX
+        r'\d{5}\s\d{5} ',                              # XXXXX XXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX (South Africa STD)
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{10} ',                           # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8}'                                     # 0XXXXXXXX
+    ]
+    # Check if the contact matches any of the patterns
+        return any(re.match(pattern, contact) for pattern in patterns) is not None
+# Function to parse resume with SpaCy
+# Function to parse resume with SpaCy
+def Parser_from_model(file_path):
+    result = {
+        "personal": {
+            "name": '',
+            "contact": '',
+            "email": '',
+            "location": '',
+            "link": '',
+            "invalid_email": '',
+            "invalid_contact": ''
+        },
+        "professional": {
+            "technical_skills": [],
+            "non_technical_skills": [],
+            "tools": [],
+            "experience": [
+                {
+                    "company": '',
+                    "projects": '',
+                    "role": '',
+                    "years": '',
+                    "project_experience": []
+                }
+            ],
+            "education": [
+                {
+                    "qualification": '',
+                    "university": '',
+                    "course": '',
+                    "certificate": ''
+                }
+            ]
+        }
+    }
+    try:
+        nlp = spacy.load("Spacy_Models/ner_model_05_3")
+        logging.debug("Model loaded successfully.")
+    except Exception as e:
+        logging.error(f"Error loading model: {e}")
+        return {"error": "Model loading failed"}
+    try:
+        cleaned_text, hyperlinks = extract_text_based_on_format(file_path)
+        if not cleaned_text.strip():
+            logging.error("No text extracted from the file.")
+            return {"error": "Text extraction failed"}
+    except Exception as e:
+        logging.error(f"Error extracting text from file: {e}")
+        return {"error": "Text extraction failed"}
+    try:
+        doc = nlp(cleaned_text)
+    except Exception as e:
+        logging.error(f"Error processing text with SpaCy: {e}")
+        return {"error": "Text processing failed"}
+    # Initialize entities as a dictionary with lists
+    entities = {label: [] for label in ['PERSON', 'EMAIL', 'CONTACT', 'LOCATION', 'SKILL', 'SOFT_SKILL', 'COMPANY', 'PROJECTS', 'JOB_TITLE', 'YEARS_EXPERIENCE', 'EXPERIENCE', 'QUALIFICATION', 'UNIVERSITY', 'COURSE', 'CERTIFICATE']}
+    # Process entities
+    for ent in doc.ents:
+        if ent.label_ in entities:
+            if ent.text not in entities[ent.label_]:  # Avoid duplicates
+                entities[ent.label_].append(ent.text)
+    # Map entities to the result JSON
+    result['personal']['name'] = entities.get('PERSON', [''])[0] if entities.get('PERSON', []) else ''
+    # Validate email
+    extracted_email = entities.get('EMAIL', [''])[0] if entities.get('EMAIL', []) else ''
+    if is_valid_email(extracted_email):
+        result['personal']['email'] = extracted_email
+    else:
+        logging.warning(f"Invalid email detected: {extracted_email}")
+        result['personal']['email'] = "Invalid email"
+        result['personal']['invalid_email'] = extracted_email
+    # Validate contact
+    extracted_contact = entities.get('CONTACT', [''])[0] if entities.get('CONTACT', []) else ''
+    if is_valid_contact(extracted_contact):
+        result['personal']['contact'] = extracted_contact
+    else:
+        logging.warning(f"Invalid contact detected: {extracted_contact}")
+        result['personal']['contact'] = "Invalid contact"
+        result['personal']['invalid_contact'] = extracted_contact
+    result['personal']['location'] = entities.get('LOCATION', [''])[0] if entities.get('LOCATION', []) else ''
+    result['personal']['link'] = hyperlinks  # Hyperlinks from extracted text
+    result['professional']['technical_skills'] = entities.get('SKILL', [])
+    result['professional']['non_technical_skills'] = entities.get('SOFT_SKILL', [])
+    result['professional']['tools'] = []  # Add logic if tools extraction is needed
+    result['professional']['experience'][0]['company'] = entities.get('COMPANY', [''])[0] if entities.get('COMPANY', []) else ''
+    result['professional']['experience'][0]['projects'] = entities.get('PROJECTS', [''])[0] if entities.get('PROJECTS', []) else ''
+    result['professional']['experience'][0]['role'] = entities.get('JOB_TITLE', [''])[0] if entities.get('JOB_TITLE', []) else ''
+    result['professional']['experience'][0]['years'] = entities.get('YEARS_EXPERIENCE', [''])[0] if entities.get('YEARS_EXPERIENCE', []) else ''
+    result['professional']['experience'][0]['project_experience'] = entities.get('EXPERIENCE', [])
+    result['professional']['education'][0]['qualification'] = entities.get('QUALIFICATION', [''])[0] if entities.get('QUALIFICATION', []) else ''
+    result['professional']['education'][0]['university'] = entities.get('UNIVERSITY', [''])[0] if entities.get('UNIVERSITY', []) else ''
+    result['professional']['education'][0]['course'] = entities.get('COURSE', [''])[0] if entities.get('COURSE', []) else ''
+    result['professional']['education'][0]['certificate'] = entities.get('CERTIFICATE', [''])[0] if entities.get('CERTIFICATE', []) else ''
+    print(result)
+    return result