ResumeExtractor4

Sleeping

App Files Files Community

WebashalarForML commited on Sep 28, 2024

Commit

be55e80

verified ·

1 Parent(s): 3c92024

Update utils/mistral.py

Browse files

Files changed (1) hide show

utils/mistral.py +378 -377

utils/mistral.py CHANGED Viewed

@@ -1,377 +1,378 @@
-# mistral.py
-import os
-import json
-import logging
-from huggingface_hub import InferenceClient
-from huggingface_hub.utils._errors import BadRequestError
-from dotenv import load_dotenv
-from utils.fileTotext import extract_text_based_on_format
-import re
-from utils.spacy import Parser_from_model
-# Load environment variables from .env file
-load_dotenv()
-# Authenticate with Hugging Face
-HFT = os.getenv('HF_TOKEN')
-if not HFT:
-    raise ValueError("Hugging Face token is not set in environment variables.")
-client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
-# Function to clean model output
-def Data_Cleaner(text):
-    pattern = r".*?format:"
-    result = re.split(pattern, text, maxsplit=1)
-    if len(result) > 1:
-        text_after_format = result[1].strip().strip('`').strip('json')
-    else:
-        text_after_format = text.strip().strip('`').strip('json')
-    return text_after_format
-# Function to call Mistral and process output
-def Model_ProfessionalDetails_Output(resume, client):
-    system_role = {
-    "role": "system",
-    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
-    }
-    user_prompt = {
-    "role": "user",
-    "content": f'''Act as a resume parser for the following text given in text: {resume}
-    Extract the text in the following output JSON string as:
-    {{
-        "professional": {{
-            "technical_skills": "List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types.",
-            "non_technical_skills": "Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills.",
-            "tools": "Enumerate all software tools, platforms, and applications (e.g., Figma, Unity, MS Office, etc.) referenced in the resume, distinctly separate from skills.",
-            "projects": "Extract the names or titles of all projects mentioned in the resume.",
-            "projects_experience": "Summarize overall project experiences, providing a brief description of each project as detailed in the resume.",
-            "experience": "Calculate total professional work experience in years and months based on the resume.",
-            "companies_worked_at": "List the names of all companies where employment is mentioned in the resume.",
-            "certifications": "Extract and list all certifications obtained as stated in the resume.",
-            "roles": "Include the names of all job titles or roles held as indicated in the resume.",
-            "qualifications": "List educational qualifications (e.g., B.Tech) from the resume. If none are found, return 'No education listed'.",
-            "courses": "Extract the names of completed courses based on the resume. If none are found, return 'No courses listed'.",
-            "university": "Identify the name of the university, college, or institute attended, based on the resume. If not found, return 'No university listed'.",
-            "year_of_graduation": "Extract the year of graduation from the resume. If not found, return 'No year of graduation listed'."
-        }}
-    }}
-    Json Output:
-    '''
-    }
-    response = ""
-    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
-        response += message.choices[0].delta.content
-    try:
-        clean_response = Data_Cleaner(response)
-        parsed_response = json.loads(clean_response)
-    except json.JSONDecodeError as e:
-        logging.error(f"JSON Decode Error: {e}")
-        return {}
-    return parsed_response
-def Model_PersonalDetails_Output(resume, client):
-    system_role = {
-    "role": "system",
-    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
-    }
-    user_prompt = {
-    "role": "user",
-    "content": f'''Act as a resume parser for the following text given in text: {resume}
-    Extract the text in the following output JSON string as:
-    {{
-        "personal": {{
-            "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
-            "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
-            "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
-            "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
-            "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
-        }}
-    }}
-    output:
-    '''
-    }
-    # Response
-    response = ""
-    for message in client.chat_completion(
-        messages=[system_role, user_prompt],
-        max_tokens=3000,
-        stream=True,
-        temperature=0.35,
-    ):
-        response += message.choices[0].delta.content
-    # Handle cases where the response might have formatting issues
-    try:
-        #print('The Og response:-->',response)
-        clean_response=Data_Cleaner(response)
-        #print("After data cleaning",clean_response)
-        parsed_response = json.loads(clean_response)
-    except json.JSONDecodeError as e:
-        print("JSON Decode Error:", e)
-        print("Raw Response:", response)
-        return {}
-    return parsed_response
-# # Fallback to SpaCy if Mistral fails
-# Add regex pattern for LinkedIn and GitHub links
-linkedin_pattern = r"https?://(?:www\.)?linkedin\.com/[\w\-_/]+"
-github_pattern = r"https?://(?:www\.)?github\.com/[\w\-_/]+"
-email_pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
-contact_pattern = r"^\+?[\d\s\-()]{7,15}$"
-def extract_links(hyperlinks):
-    linkedin_links = []
-    github_links = []
-    # Iterate through the hyperlinks and apply regex to find LinkedIn and GitHub links
-    for link in hyperlinks:
-        if re.match(linkedin_pattern, link):
-            linkedin_links.append(link)
-        elif re.match(github_pattern, link):
-            github_links.append(link)
-    return linkedin_links, github_links
-def is_valid_email(email):
-    email_regex = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
-    return re.match(email_regex, email) is not None
-def is_valid_contact(contact):
-        patterns = [
-        r'^\+91[\s\.\-\/]?\(?0?\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with optional 0 and separators
-        r'^\+91[\s\.\-\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with 10 digits separated
-        r'^\d{5}[\s\-\.\/]?\d{5}$',  # Local format without country code
-        r'^\+91[\s\.\-\/]?\d{10}$',  # +91 with 10 digits together
-        r'^\d{10}$',  # 10 digits together
-        r'^\+91[\s\.\-\/]?\(?\d{5}\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$'  # +91 with varying separators
-        r'\+1\s\(\d{3}\)\s\d{3}-\d{4} ',               # USA/Canada Intl +1 (XXX) XXX-XXXX
-        r'\(\d{3}\)\s\d{3}-\d{4} ',                    # USA/Canada STD (XXX) XXX-XXXX
-        r'\(\d{3}\)\s\d{3}\s\d{4} ',                   # USA/Canada (XXX) XXX XXXX
-        r'\(\d{3}\)\s\d{3}\s\d{3} ',                   # USA/Canada (XXX) XXX XXX
-        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
-        r'\d{10} ',                                    # XXXXXXXXXX
-        r'\+44\s\d{4}\s\d{6} ',                        # UK Intl +44 XXXX XXXXXX
-        r'\+44\s\d{3}\s\d{3}\s\d{4} ',                 # UK Intl +44 XXX XXX XXXX
-        r'0\d{4}\s\d{6} ',                             # UK STD 0XXXX XXXXXX
-        r'0\d{3}\s\d{3}\s\d{4} ',                      # UK STD 0XXX XXX XXXX
-        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+61\s\d\s\d{4}\s\d{4} ',                    # Australia Intl +61 X XXXX XXXX
-        r'0\d\s\d{4}\s\d{4} ',                         # Australia STD 0X XXXX XXXX
-        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+91\s\d{5}-\d{5} ',                         # India Intl +91 XXXXX-XXXXX
-        r'\+91\s\d{4}-\d{6} ',                         # India Intl +91 XXXX-XXXXXX
-        r'\+91\s\d{10} ',                              # India Intl +91 XXXXXXXXXX
-        r'0\d{2}-\d{7} ',                              # India STD 0XX-XXXXXXX
-        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
-        r'\+49\s\d{4}\s\d{8} ',                        # Germany Intl +49 XXXX XXXXXXXX
-        r'\+49\s\d{3}\s\d{7} ',                        # Germany Intl +49 XXX XXXXXXX
-        r'0\d{3}\s\d{8} ',                             # Germany STD 0XXX XXXXXXXX
-        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
-        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
-        r'0\d{11} ',                                   # 0XXXXXXXXXXX
-        r'\+86\s\d{3}\s\d{4}\s\d{4} ',                 # China Intl +86 XXX XXXX XXXX
-        r'0\d{3}\s\d{4}\s\d{4} ',                      # China STD 0XXX XXXX XXXX
-        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
-        r'\+81\s\d\s\d{4}\s\d{4} ',                    # Japan Intl +81 X XXXX XXXX
-        r'\+81\s\d{2}\s\d{4}\s\d{4} ',                 # Japan Intl +81 XX XXXX XXXX
-        r'0\d\s\d{4}\s\d{4} ',                         # Japan STD 0X XXXX XXXX
-        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
-        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+55\s\d{2}\s\d{5}-\d{4} ',                  # Brazil Intl +55 XX XXXXX-XXXX
-        r'\+55\s\d{2}\s\d{4}-\d{4} ',                  # Brazil Intl +55 XX XXXX-XXXX
-        r'0\d{2}\s\d{4}\s\d{4} ',                      # Brazil STD 0XX XXXX XXXX
-        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
-        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+33\s\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',      # France Intl +33 X XX XX XX XX
-        r'0\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',           # France STD 0X XX XX XX XX
-        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+7\s\d{3}\s\d{3}-\d{2}-\d{2} ',             # Russia Intl +7 XXX XXX-XX-XX
-        r'8\s\d{3}\s\d{3}-\d{2}-\d{2} ',               # Russia STD 8 XXX XXX-XX-XX
-        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
-        r'8\d{10} ',                                   # 8 XXXXXXXXXX
-        r'\+27\s\d{2}\s\d{3}\s\d{4} ',                 # South Africa Intl +27 XX XXX XXXX
-        r'0\d{2}\s\d{3}\s\d{4} ',                      # South Africa STD 0XX XXX XXXX
-        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+52\s\d{3}\s\d{3}\s\d{4} ',                 # Mexico Intl +52 XXX XXX XXXX
-        r'\+52\s\d{2}\s\d{4}\s\d{4} ',                 # Mexico Intl +52 XX XXXX XXXX
-        r'01\s\d{3}\s\d{4} ',                          # Mexico STD 01 XXX XXXX
-        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
-        r'01\d{7} ',                                   # 01 XXXXXXX
-        r'\+234\s\d{3}\s\d{3}\s\d{4} ',                # Nigeria Intl +234 XXX XXX XXXX
-        r'0\d{3}\s\d{3}\s\d{4} ',                      # Nigeria STD 0XXX XXX XXXX
-        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+971\s\d\s\d{3}\s\d{4} ',                   # UAE Intl +971 X XXX XXXX
-        r'0\d\s\d{3}\s\d{4} ',                         # UAE STD 0X XXX XXXX
-        r'\+971\d{8} ',                                # +971 XXXXXXXX
-        r'0\d{8} ',                                    # 0XXXXXXXX
-        r'\+54\s9\s\d{3}\s\d{3}\s\d{4} ',              # Argentina Intl +54 9 XXX XXX XXXX
-        r'\+54\s\d{1}\s\d{4}\s\d{4} ',                 # Argentina Intl +54 X XXXX XXXX
-        r'0\d{3}\s\d{4} ',                             # Argentina STD 0XXX XXXX
-        r'\+54\d{10} ',                                # +54 9 XXXXXXXXXX
-        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
-        r'0\d{7} ',                                    # 0XXXXXXX
-        r'\+966\s\d\s\d{3}\s\d{4} ',                   # Saudi Intl +966 X XXX XXXX
-        r'0\d\s\d{3}\s\d{4} ',                         # Saudi STD 0X XXX XXXX
-        r'\+966\d{8} ',                                # +966 XXXXXXXX
-        r'0\d{8} ',                                    # 0XXXXXXXX
-        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
-        r'\+1\s\d{3}\s\d{3}\s\d{4} ',                  # +1 XXX XXX XXXX
-        r'\d{5}\s\d{5} ',                              # XXXXX XXXXX
-        r'\d{10} ',                                    # XXXXXXXXXX
-        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
-        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
-        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
-        r'0\d{11} ',                                   # 0XXXXXXXXXXX
-        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
-        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
-        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
-        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX
-        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
-        r'8\d{10} ',                                   # 8 XXXXXXXXXX
-        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
-        r'0\d{9} ',                                    # 0XXXXXXXXX (South Africa STD)
-        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
-        r'01\d{7} ',                                   # 01 XXXXXXX
-        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
-        r'0\d{10} ',                                   # 0XXXXXXXXXX
-        r'\+971\d{8} ',                                # +971 XXXXXXXX
-        r'0\d{8} ',                                    # 0XXXXXXXX
-        r'\+54\s9\s\d{10} ',                           # +54 9 XXXXXXXXXX
-        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
-        r'0\d{7} ',                                    # 0XXXXXXX
-        r'\+966\d{8} ',                                # +966 XXXXXXXX
-        r'0\d{8}'                                     # 0XXXXXXXX
-    ]
-    # Check if the contact matches any of the patterns
-        return any(re.match(pattern, contact) for pattern in patterns) is not None
-def validate_contact_email(personal_data):
-    contact = personal_data.get('contact', 'Not found')
-    email = personal_data.get('email', 'Not found')
-    valid_contact = is_valid_contact(contact) if contact != 'Not found' else False
-    valid_email = is_valid_email(email) if email != 'Not found' else False
-    invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
-    invalid_email = 'Invalid email' if not valid_email else 'Valid email'
-    return valid_contact, invalid_contact, valid_email, invalid_email
-def process_resume_data(file_path):
-    resume_text, hyperlinks = extract_text_based_on_format(file_path)
-    print("Resume converted to text successfully.")
-    if not resume_text:
-        return {"error": "Text extraction failed"}
-    # Extract LinkedIn and GitHub links
-    linkedin_links, github_links = extract_links(hyperlinks)
-    # Attempt to use Mistral model for parsing
-    try:
-        # Extract personal details using Mistral
-        per_data = Model_PersonalDetails_Output(resume_text, client)
-        # Extract professional details using Mistral
-        pro_data = Model_ProfessionalDetails_Output(resume_text, client)
-        # Check if per_data and pro_data have been populated correctly
-        if not per_data:
-            logging.warning("Mistral personal data extraction failed.")
-            per_data = {}
-        if not pro_data:
-            logging.warning("Mistral professional data extraction failed.")
-            pro_data = {}
-        # Combine both personal and professional details into a structured output
-        result = {
-            "personal": {
-                "name": per_data.get('personal', {}).get('name', 'Not found'),
-                "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
-                "email": per_data.get('personal', {}).get('email', 'Not found'),
-                "location": per_data.get('personal', {}).get('Address', 'Not found'),
-                "linkedin": linkedin_links,
-                "github": github_links,
-                "other_links": hyperlinks  # Store remaining links if needed
-            },
-            "professional": {
-                "technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
-                "non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
-                "tools": pro_data.get('professional', {}).get('tools', 'Not found'),
-                "experience": [
-                    {
-                        "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
-                        "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
-                        "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
-                        "years": pro_data.get('professional', {}).get('experience', 'Not found'),
-                        "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
-                    }
-                ],
-                "education": [
-                    {
-                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
-                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
-                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
-                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
-                    }
-                ]
-            }
-        }
-        # Validate contact and email
-        valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
-        result['personal']['valid_contact'] = valid_contact
-        result['personal']['invalid_contact'] = invalid_contact
-        result['personal']['valid_email'] = valid_email
-        result['personal']['invalid_email'] = invalid_email
-        # If Mistral produces valid output, return it
-        if per_data or pro_data:
-            logging.info("Successfully extracted data using Mistral.")
-            print("---------Mistral-------")
-            return result
-        else:
-            raise ValueError("Mistral returned no output")
-    # Handle HuggingFace API or Mistral model errors
-    except BadRequestError as e:
-        logging.error(f"HuggingFace API error: {e}. Falling back to SpaCy.")
-        print(f"HuggingFace API error: {e}. Falling back to SpaCy.")
-    except Exception as e:
-        logging.error(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
-        print(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
-    # Fallback to SpaCy if Mistral fails
-    logging.warning("Mistral failed, switching to SpaCy.")
-    print("---------SpaCy-------")
-    return Parser_from_model(file_path)

+# mistral.py
+import os
+import json
+import logging
+from huggingface_hub import InferenceClient
+#from huggingface_hub.utils._errors import BadRequestError
+from huggingface_hub import BadRequestError
+from dotenv import load_dotenv
+from utils.fileTotext import extract_text_based_on_format
+import re
+from utils.spacy import Parser_from_model
+# Load environment variables from .env file
+load_dotenv()
+# Authenticate with Hugging Face
+HFT = os.getenv('HF_TOKEN')
+if not HFT:
+    raise ValueError("Hugging Face token is not set in environment variables.")
+client = InferenceClient(model="mistralai/Mistral-Nemo-Instruct-2407", token=HFT)
+# Function to clean model output
+def Data_Cleaner(text):
+    pattern = r".*?format:"
+    result = re.split(pattern, text, maxsplit=1)
+    if len(result) > 1:
+        text_after_format = result[1].strip().strip('`').strip('json')
+    else:
+        text_after_format = text.strip().strip('`').strip('json')
+    return text_after_format
+# Function to call Mistral and process output
+def Model_ProfessionalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "professional": {{
+            "technical_skills": "List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types.",
+            "non_technical_skills": "Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills.",
+            "tools": "Enumerate all software tools, platforms, and applications (e.g., Figma, Unity, MS Office, etc.) referenced in the resume, distinctly separate from skills.",
+            "projects": "Extract the names or titles of all projects mentioned in the resume.",
+            "projects_experience": "Summarize overall project experiences, providing a brief description of each project as detailed in the resume.",
+            "experience": "Calculate total professional work experience in years and months based on the resume.",
+            "companies_worked_at": "List the names of all companies where employment is mentioned in the resume.",
+            "certifications": "Extract and list all certifications obtained as stated in the resume.",
+            "roles": "Include the names of all job titles or roles held as indicated in the resume.",
+            "qualifications": "List educational qualifications (e.g., B.Tech) from the resume. If none are found, return 'No education listed'.",
+            "courses": "Extract the names of completed courses based on the resume. If none are found, return 'No courses listed'.",
+            "university": "Identify the name of the university, college, or institute attended, based on the resume. If not found, return 'No university listed'.",
+            "year_of_graduation": "Extract the year of graduation from the resume. If not found, return 'No year of graduation listed'."
+        }}
+    }}
+    Json Output:
+    '''
+    }
+    response = ""
+    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
+        response += message.choices[0].delta.content
+    try:
+        clean_response = Data_Cleaner(response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        logging.error(f"JSON Decode Error: {e}")
+        return {}
+    return parsed_response
+def Model_PersonalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "personal": {{
+            "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
+            "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
+            "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
+            "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
+            "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
+        }}
+    }}
+    output:
+    '''
+    }
+    # Response
+    response = ""
+    for message in client.chat_completion(
+        messages=[system_role, user_prompt],
+        max_tokens=3000,
+        stream=True,
+        temperature=0.35,
+    ):
+        response += message.choices[0].delta.content
+    # Handle cases where the response might have formatting issues
+    try:
+        #print('The Og response:-->',response)
+        clean_response=Data_Cleaner(response)
+        #print("After data cleaning",clean_response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        print("JSON Decode Error:", e)
+        print("Raw Response:", response)
+        return {}
+    return parsed_response
+# # Fallback to SpaCy if Mistral fails
+# Add regex pattern for LinkedIn and GitHub links
+linkedin_pattern = r"https?://(?:www\.)?linkedin\.com/[\w\-_/]+"
+github_pattern = r"https?://(?:www\.)?github\.com/[\w\-_/]+"
+email_pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
+contact_pattern = r"^\+?[\d\s\-()]{7,15}$"
+def extract_links(hyperlinks):
+    linkedin_links = []
+    github_links = []
+    # Iterate through the hyperlinks and apply regex to find LinkedIn and GitHub links
+    for link in hyperlinks:
+        if re.match(linkedin_pattern, link):
+            linkedin_links.append(link)
+        elif re.match(github_pattern, link):
+            github_links.append(link)
+    return linkedin_links, github_links
+def is_valid_email(email):
+    email_regex = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
+    return re.match(email_regex, email) is not None
+def is_valid_contact(contact):
+        patterns = [
+        r'^\+91[\s\.\-\/]?\(?0?\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with optional 0 and separators
+        r'^\+91[\s\.\-\/]?\d{5}[\s\-\.\/]?\d{5}$',  # +91 with 10 digits separated
+        r'^\d{5}[\s\-\.\/]?\d{5}$',  # Local format without country code
+        r'^\+91[\s\.\-\/]?\d{10}$',  # +91 with 10 digits together
+        r'^\d{10}$',  # 10 digits together
+        r'^\+91[\s\.\-\/]?\(?\d{5}\)?[\s\-\.\/]?\d{5}[\s\-\.\/]?\d{5}$'  # +91 with varying separators
+        r'\+1\s\(\d{3}\)\s\d{3}-\d{4} ',               # USA/Canada Intl +1 (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}-\d{4} ',                    # USA/Canada STD (XXX) XXX-XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{4} ',                   # USA/Canada (XXX) XXX XXXX
+        r'\(\d{3}\)\s\d{3}\s\d{3} ',                   # USA/Canada (XXX) XXX XXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\s\d{4}\s\d{6} ',                        # UK Intl +44 XXXX XXXXXX
+        r'\+44\s\d{3}\s\d{3}\s\d{4} ',                 # UK Intl +44 XXX XXX XXXX
+        r'0\d{4}\s\d{6} ',                             # UK STD 0XXXX XXXXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # UK STD 0XXX XXX XXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\s\d\s\d{4}\s\d{4} ',                    # Australia Intl +61 X XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Australia STD 0X XXXX XXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\s\d{5}-\d{5} ',                         # India Intl +91 XXXXX-XXXXX
+        r'\+91\s\d{4}-\d{6} ',                         # India Intl +91 XXXX-XXXXXX
+        r'\+91\s\d{10} ',                              # India Intl +91 XXXXXXXXXX
+        r'0\d{2}-\d{7} ',                              # India STD 0XX-XXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\s\d{4}\s\d{8} ',                        # Germany Intl +49 XXXX XXXXXXXX
+        r'\+49\s\d{3}\s\d{7} ',                        # Germany Intl +49 XXX XXXXXXX
+        r'0\d{3}\s\d{8} ',                             # Germany STD 0XXX XXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\s\d{3}\s\d{4}\s\d{4} ',                 # China Intl +86 XXX XXXX XXXX
+        r'0\d{3}\s\d{4}\s\d{4} ',                      # China STD 0XXX XXXX XXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\s\d\s\d{4}\s\d{4} ',                    # Japan Intl +81 X XXXX XXXX
+        r'\+81\s\d{2}\s\d{4}\s\d{4} ',                 # Japan Intl +81 XX XXXX XXXX
+        r'0\d\s\d{4}\s\d{4} ',                         # Japan STD 0X XXXX XXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\s\d{2}\s\d{5}-\d{4} ',                  # Brazil Intl +55 XX XXXXX-XXXX
+        r'\+55\s\d{2}\s\d{4}-\d{4} ',                  # Brazil Intl +55 XX XXXX-XXXX
+        r'0\d{2}\s\d{4}\s\d{4} ',                      # Brazil STD 0XX XXXX XXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\s\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',      # France Intl +33 X XX XX XX XX
+        r'0\d\s\d{2}\s\d{2}\s\d{2}\s\d{2} ',           # France STD 0X XX XX XX XX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\s\d{3}\s\d{3}-\d{2}-\d{2} ',             # Russia Intl +7 XXX XXX-XX-XX
+        r'8\s\d{3}\s\d{3}-\d{2}-\d{2} ',               # Russia STD 8 XXX XXX-XX-XX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\s\d{2}\s\d{3}\s\d{4} ',                 # South Africa Intl +27 XX XXX XXXX
+        r'0\d{2}\s\d{3}\s\d{4} ',                      # South Africa STD 0XX XXX XXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+52\s\d{3}\s\d{3}\s\d{4} ',                 # Mexico Intl +52 XXX XXX XXXX
+        r'\+52\s\d{2}\s\d{4}\s\d{4} ',                 # Mexico Intl +52 XX XXXX XXXX
+        r'01\s\d{3}\s\d{4} ',                          # Mexico STD 01 XXX XXXX
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\s\d{3}\s\d{3}\s\d{4} ',                # Nigeria Intl +234 XXX XXX XXXX
+        r'0\d{3}\s\d{3}\s\d{4} ',                      # Nigeria STD 0XXX XXX XXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\s\d\s\d{3}\s\d{4} ',                   # UAE Intl +971 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # UAE STD 0X XXX XXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{3}\s\d{3}\s\d{4} ',              # Argentina Intl +54 9 XXX XXX XXXX
+        r'\+54\s\d{1}\s\d{4}\s\d{4} ',                 # Argentina Intl +54 X XXXX XXXX
+        r'0\d{3}\s\d{4} ',                             # Argentina STD 0XXX XXXX
+        r'\+54\d{10} ',                                # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\s\d\s\d{3}\s\d{4} ',                   # Saudi Intl +966 X XXX XXXX
+        r'0\d\s\d{3}\s\d{4} ',                         # Saudi STD 0X XXX XXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+1\d{10} ',                                 # +1 XXXXXXXXXX
+        r'\+1\s\d{3}\s\d{3}\s\d{4} ',                  # +1 XXX XXX XXXX
+        r'\d{5}\s\d{5} ',                              # XXXXX XXXXX
+        r'\d{10} ',                                    # XXXXXXXXXX
+        r'\+44\d{10} ',                                # +44 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+61\d{9} ',                                 # +61 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+91\d{10} ',                                # +91 XXXXXXXXXX
+        r'\+49\d{12} ',                                # +49 XXXXXXXXXXXX
+        r'\+49\d{10} ',                                # +49 XXXXXXXXXX
+        r'0\d{11} ',                                   # 0XXXXXXXXXXX
+        r'\+86\d{11} ',                                # +86 XXXXXXXXXXX
+        r'\+81\d{10} ',                                # +81 XXXXXXXXXX
+        r'\+81\d{9} ',                                 # +81 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+55\d{11} ',                                # +55 XXXXXXXXXXX
+        r'\+55\d{10} ',                                # +55 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+33\d{9} ',                                 # +33 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX
+        r'\+7\d{10} ',                                 # +7 XXXXXXXXXX
+        r'8\d{10} ',                                   # 8 XXXXXXXXXX
+        r'\+27\d{9} ',                                 # +27 XXXXXXXXX
+        r'0\d{9} ',                                    # 0XXXXXXXXX (South Africa STD)
+        r'\+52\d{10} ',                                # +52 XXXXXXXXXX
+        r'01\d{7} ',                                   # 01 XXXXXXX
+        r'\+234\d{10} ',                               # +234 XXXXXXXXXX
+        r'0\d{10} ',                                   # 0XXXXXXXXXX
+        r'\+971\d{8} ',                                # +971 XXXXXXXX
+        r'0\d{8} ',                                    # 0XXXXXXXX
+        r'\+54\s9\s\d{10} ',                           # +54 9 XXXXXXXXXX
+        r'\+54\d{9} ',                                 # +54 XXXXXXXXX
+        r'0\d{7} ',                                    # 0XXXXXXX
+        r'\+966\d{8} ',                                # +966 XXXXXXXX
+        r'0\d{8}'                                     # 0XXXXXXXX
+    ]
+    # Check if the contact matches any of the patterns
+        return any(re.match(pattern, contact) for pattern in patterns) is not None
+def validate_contact_email(personal_data):
+    contact = personal_data.get('contact', 'Not found')
+    email = personal_data.get('email', 'Not found')
+    valid_contact = is_valid_contact(contact) if contact != 'Not found' else False
+    valid_email = is_valid_email(email) if email != 'Not found' else False
+    invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
+    invalid_email = 'Invalid email' if not valid_email else 'Valid email'
+    return valid_contact, invalid_contact, valid_email, invalid_email
+def process_resume_data(file_path):
+    resume_text, hyperlinks = extract_text_based_on_format(file_path)
+    print("Resume converted to text successfully.")
+    if not resume_text:
+        return {"error": "Text extraction failed"}
+    # Extract LinkedIn and GitHub links
+    linkedin_links, github_links = extract_links(hyperlinks)
+    # Attempt to use Mistral model for parsing
+    try:
+        # Extract personal details using Mistral
+        per_data = Model_PersonalDetails_Output(resume_text, client)
+        # Extract professional details using Mistral
+        pro_data = Model_ProfessionalDetails_Output(resume_text, client)
+        # Check if per_data and pro_data have been populated correctly
+        if not per_data:
+            logging.warning("Mistral personal data extraction failed.")
+            per_data = {}
+        if not pro_data:
+            logging.warning("Mistral professional data extraction failed.")
+            pro_data = {}
+        # Combine both personal and professional details into a structured output
+        result = {
+            "personal": {
+                "name": per_data.get('personal', {}).get('name', 'Not found'),
+                "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
+                "email": per_data.get('personal', {}).get('email', 'Not found'),
+                "location": per_data.get('personal', {}).get('Address', 'Not found'),
+                "linkedin": linkedin_links,
+                "github": github_links,
+                "other_links": hyperlinks  # Store remaining links if needed
+            },
+            "professional": {
+                "technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
+                "non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
+                "tools": pro_data.get('professional', {}).get('tools', 'Not found'),
+                "experience": [
+                    {
+                        "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
+                        "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
+                        "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
+                        "years": pro_data.get('professional', {}).get('experience', 'Not found'),
+                        "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
+                    }
+                ],
+                "education": [
+                    {
+                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
+                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
+                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
+                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
+                    }
+                ]
+            }
+        }
+        # Validate contact and email
+        valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
+        result['personal']['valid_contact'] = valid_contact
+        result['personal']['invalid_contact'] = invalid_contact
+        result['personal']['valid_email'] = valid_email
+        result['personal']['invalid_email'] = invalid_email
+        # If Mistral produces valid output, return it
+        if per_data or pro_data:
+            logging.info("Successfully extracted data using Mistral.")
+            print("---------Mistral-------")
+            return result
+        else:
+            raise ValueError("Mistral returned no output")
+    # Handle HuggingFace API or Mistral model errors
+    except BadRequestError as e:
+        logging.error(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+        print(f"HuggingFace API error: {e}. Falling back to SpaCy.")
+    except Exception as e:
+        logging.error(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+        print(f"An error occurred while processing with Mistral: {e}. Falling back to SpaCy.")
+    # Fallback to SpaCy if Mistral fails
+    logging.warning("Mistral failed, switching to SpaCy.")
+    print("---------SpaCy-------")
+    return Parser_from_model(file_path)