ResumeExtractor3

Sleeping

App Files Files Community

WebashalarForML commited on Oct 17, 2024

Commit

750408e

verified ·

1 Parent(s): a199b9f

Update utils/mistral.py

Browse files

Files changed (1) hide show

utils/mistral.py +39 -34

utils/mistral.py CHANGED Viewed

@@ -41,7 +41,7 @@ def Data_Cleaner(text):
 def Model_ProfessionalDetails_Output(resume, client):
     system_role = {
     "role": "system",
-    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
     }
     user_prompt = {
     "role": "user",
@@ -52,16 +52,16 @@ def Model_ProfessionalDetails_Output(resume, client):
             "technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
             "non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
             "tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
             "projects": ["Extract all projects names or titles mentioned in the resume."],
             "projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
-            "experience": ["Calculate total professional work experience in years and months based on the resume."],
-            "companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
             "certifications": ["Extract and list all certifications obtained as stated in the resume."],
             "roles": ["Include the names of all job titles or roles held as indicated in the resume."],
-            "qualifications": ["List educational qualifications lik from the resume. If none are found, return 'Not found'."],
-            "courses": ["Extract the names of completed courses based on the resume. If none are found, return 'Not found'."],
-            "university": ["Identify and Extract the name of the university, college, or institute attended, based on the resume. If not found, return 'Not found'."],
-            "year_of_graduation": ["Extract the year of graduation from the resume. If not found, return 'Not found'."]
         }}
     }}
     output:
@@ -84,7 +84,7 @@ def Model_ProfessionalDetails_Output(resume, client):
 def Model_PersonalDetails_Output(resume, client):
     system_role = {
     "role": "system",
-    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
     }
     user_prompt = {
     "role": "user",
@@ -92,11 +92,11 @@ def Model_PersonalDetails_Output(resume, client):
     Extract the text in the following output JSON string as:
     {{
         "personal": {{
-            "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
-            "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
-            "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
-            "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
-            "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
         }}
     }}
     output:
@@ -281,11 +281,11 @@ def is_valid_contact(contact):
 def validate_contact_email(personal_data):
-    contact = personal_data.get('contact', 'Not found')
-    email = personal_data.get('email', 'Not found')
-    valid_contact = is_valid_contact(contact) if contact != 'Not found' else False
-    valid_email = is_valid_email(email) if email != 'Not found' else False
     invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
     invalid_email = 'Invalid email' if not valid_email else 'Valid email'
@@ -324,39 +324,44 @@ def process_resume_data(file_path):
         # Combine both personal and professional details into a structured output
         result = {
             "personal": {
-                "name": per_data.get('personal', {}).get('name', 'Not found'),
-                "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
-                "email": per_data.get('personal', {}).get('email', 'Not found'),
-                "location": per_data.get('personal', {}).get('Address', 'Not found'),
                 "linkedin": linkedin_links,
                 "github": github_links,
                 "other_links": hyperlinks  # Store remaining links if needed
             },
             "professional": {
-                "technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
-                "non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
-                "tools": pro_data.get('professional', {}).get('tools', 'Not found'),
                 "experience": [
                     {
-                        "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
-                        "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
-                        "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
-                        "years": pro_data.get('professional', {}).get('experience', 'Not found'),
-                        "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
                     }
                 ],
                 "education": [
                     {
-                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
-                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
-                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
-                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
                     }
                 ]
             }
         }
-        # Validate contact and email
         valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
         result['personal']['valid_contact'] = valid_contact
         result['personal']['invalid_contact'] = invalid_contact

 def Model_ProfessionalDetails_Output(resume, client):
     system_role = {
     "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
     }
     user_prompt = {
     "role": "user",
             "technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
             "non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
             "tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
+            "companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
             "projects": ["Extract all projects names or titles mentioned in the resume."],
             "projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
+            "experience": ["Calculate total professional work experience in years and months based on the resume."],
             "certifications": ["Extract and list all certifications obtained as stated in the resume."],
             "roles": ["Include the names of all job titles or roles held as indicated in the resume."],
+            "qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
+            "university": ["Identify and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
+            "courses": ["Extract the names of completed courses or based on the resume. If none are found, return []."],
+            "year_of_graduation": ["Extract the year of graduation from the resume. If not found, return []."]
         }}
     }}
     output:
 def Model_PersonalDetails_Output(resume, client):
     system_role = {
     "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
     }
     user_prompt = {
     "role": "user",
     Extract the text in the following output JSON string as:
     {{
         "personal": {{
+            "name": ["Extract the full name based on the resume. If not found, return []."],
+            "contact_number": ["Extract the contact number from the resume. If not found, return []."],
+            "email": ["Extract the email address from the resume. If not found, return []."],
+            "Address": ["Extract the Address or address from the resume. If not found, return []."],
+            "link": ["Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return []."]
         }}
     }}
     output:
 def validate_contact_email(personal_data):
+    contact = personal_data.get('contact', [])
+    email = personal_data.get('email', [])
+    valid_contact = is_valid_contact(contact) if contact != [] else False
+    valid_email = is_valid_email(email) if email != [] else False
     invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
     invalid_email = 'Invalid email' if not valid_email else 'Valid email'
         # Combine both personal and professional details into a structured output
         result = {
             "personal": {
+                "name": per_data.get('personal', {}).get(['name'], ['Not found']),
+                "contact": per_data.get('personal', {}).get(['contact_number'], ['Not found']),
+                "email": per_data.get('personal', {}).get(['email'], ['Not found']),
+                "location": per_data.get('personal', {}).get(['Address'], ['Not found']),
                 "linkedin": linkedin_links,
                 "github": github_links,
                 "other_links": hyperlinks  # Store remaining links if needed
             },
             "professional": {
+                "technical_skills": pro_data.get('professional', {}).get(['technical_skills'], ['Not found']),
+                "non_technical_skills": pro_data.get('professional', {}).get(['non_technical_skills'], ['Not found']),
+                "tools": pro_data.get('professional', {}).get(['tools'], ['Not found']),
                 "experience": [
                     {
+                        "company": pro_data.get('professional', {}).get('companies_worked_at', ['Not found']),
+                        "projects": pro_data.get('professional', {}).get('projects', ['Not found']),
+                        "role": pro_data.get('professional', {}).get('worked_as', ['Not found']),
+                        "years": pro_data.get('professional', {}).get('experience', ['Not found']),
+                        "project_experience": pro_data.get('professional', {}).get('projects_experience', ['Not found'])
                     }
                 ],
                 "education": [
                     {
+                        "qualification": pro_data.get('professional', {}).get('qualification', ['Not found']),
+                        "university": pro_data.get('professional', {}).get('university', ['Not found']),
+                        "course": pro_data.get('professional', {}).get('course', ['Not found']),
+                        "certificate": pro_data.get('professional', {}).get('certification', ['Not found'])
                     }
                 ]
             }
         }
+        #Appending the list if any available as a text
+        result['personal']['other_links'] += per_data.get('personal', {}).get('link', ['Not found'])
+        #Added the validator for details, Validate contact and email
         valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
         result['personal']['valid_contact'] = valid_contact
         result['personal']['invalid_contact'] = invalid_contact