WebashalarForML commited on
Commit
750408e
·
verified ·
1 Parent(s): a199b9f

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +39 -34
utils/mistral.py CHANGED
@@ -41,7 +41,7 @@ def Data_Cleaner(text):
41
  def Model_ProfessionalDetails_Output(resume, client):
42
  system_role = {
43
  "role": "system",
44
- "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
45
  }
46
  user_prompt = {
47
  "role": "user",
@@ -52,16 +52,16 @@ def Model_ProfessionalDetails_Output(resume, client):
52
  "technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
53
  "non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
54
  "tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
 
55
  "projects": ["Extract all projects names or titles mentioned in the resume."],
56
  "projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
57
- "experience": ["Calculate total professional work experience in years and months based on the resume."],
58
- "companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
59
  "certifications": ["Extract and list all certifications obtained as stated in the resume."],
60
  "roles": ["Include the names of all job titles or roles held as indicated in the resume."],
61
- "qualifications": ["List educational qualifications lik from the resume. If none are found, return 'Not found'."],
62
- "courses": ["Extract the names of completed courses based on the resume. If none are found, return 'Not found'."],
63
- "university": ["Identify and Extract the name of the university, college, or institute attended, based on the resume. If not found, return 'Not found'."],
64
- "year_of_graduation": ["Extract the year of graduation from the resume. If not found, return 'Not found'."]
65
  }}
66
  }}
67
  output:
@@ -84,7 +84,7 @@ def Model_ProfessionalDetails_Output(resume, client):
84
  def Model_PersonalDetails_Output(resume, client):
85
  system_role = {
86
  "role": "system",
87
- "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return 'not found'."
88
  }
89
  user_prompt = {
90
  "role": "user",
@@ -92,11 +92,11 @@ def Model_PersonalDetails_Output(resume, client):
92
  Extract the text in the following output JSON string as:
93
  {{
94
  "personal": {{
95
- "name": "Extract the full name based on the resume. If not found, return 'No name listed'.",
96
- "contact_number": "Extract the contact number from the resume. If not found, return 'No contact number listed'.",
97
- "email": "Extract the email address from the resume. If not found, return 'No email listed'.",
98
- "Address": "Extract the Address or address from the resume. If not found, return 'No Address listed'.",
99
- "link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return 'No link listed'."
100
  }}
101
  }}
102
  output:
@@ -281,11 +281,11 @@ def is_valid_contact(contact):
281
 
282
 
283
  def validate_contact_email(personal_data):
284
- contact = personal_data.get('contact', 'Not found')
285
- email = personal_data.get('email', 'Not found')
286
 
287
- valid_contact = is_valid_contact(contact) if contact != 'Not found' else False
288
- valid_email = is_valid_email(email) if email != 'Not found' else False
289
 
290
  invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
291
  invalid_email = 'Invalid email' if not valid_email else 'Valid email'
@@ -324,39 +324,44 @@ def process_resume_data(file_path):
324
  # Combine both personal and professional details into a structured output
325
  result = {
326
  "personal": {
327
- "name": per_data.get('personal', {}).get('name', 'Not found'),
328
- "contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
329
- "email": per_data.get('personal', {}).get('email', 'Not found'),
330
- "location": per_data.get('personal', {}).get('Address', 'Not found'),
331
  "linkedin": linkedin_links,
332
  "github": github_links,
333
  "other_links": hyperlinks # Store remaining links if needed
334
  },
335
  "professional": {
336
- "technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
337
- "non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
338
- "tools": pro_data.get('professional', {}).get('tools', 'Not found'),
339
  "experience": [
340
  {
341
- "company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
342
- "projects": pro_data.get('professional', {}).get('projects', 'Not found'),
343
- "role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
344
- "years": pro_data.get('professional', {}).get('experience', 'Not found'),
345
- "project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
346
  }
347
  ],
348
  "education": [
349
  {
350
- "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
351
- "university": pro_data.get('professional', {}).get('university', 'Not found'),
352
- "course": pro_data.get('professional', {}).get('course', 'Not found'),
353
- "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
354
  }
355
  ]
356
  }
357
  }
358
 
359
- # Validate contact and email
 
 
 
 
 
360
  valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
361
  result['personal']['valid_contact'] = valid_contact
362
  result['personal']['invalid_contact'] = invalid_contact
 
41
  def Model_ProfessionalDetails_Output(resume, client):
42
  system_role = {
43
  "role": "system",
44
+ "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
45
  }
46
  user_prompt = {
47
  "role": "user",
 
52
  "technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
53
  "non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
54
  "tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
55
+ "companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
56
  "projects": ["Extract all projects names or titles mentioned in the resume."],
57
  "projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
58
+ "experience": ["Calculate total professional work experience in years and months based on the resume."],
 
59
  "certifications": ["Extract and list all certifications obtained as stated in the resume."],
60
  "roles": ["Include the names of all job titles or roles held as indicated in the resume."],
61
+ "qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
62
+ "university": ["Identify and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
63
+ "courses": ["Extract the names of completed courses or based on the resume. If none are found, return []."],
64
+ "year_of_graduation": ["Extract the year of graduation from the resume. If not found, return []."]
65
  }}
66
  }}
67
  output:
 
84
  def Model_PersonalDetails_Output(resume, client):
85
  system_role = {
86
  "role": "system",
87
+ "content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
88
  }
89
  user_prompt = {
90
  "role": "user",
 
92
  Extract the text in the following output JSON string as:
93
  {{
94
  "personal": {{
95
+ "name": ["Extract the full name based on the resume. If not found, return []."],
96
+ "contact_number": ["Extract the contact number from the resume. If not found, return []."],
97
+ "email": ["Extract the email address from the resume. If not found, return []."],
98
+ "Address": ["Extract the Address or address from the resume. If not found, return []."],
99
+ "link": ["Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return []."]
100
  }}
101
  }}
102
  output:
 
281
 
282
 
283
  def validate_contact_email(personal_data):
284
+ contact = personal_data.get('contact', [])
285
+ email = personal_data.get('email', [])
286
 
287
+ valid_contact = is_valid_contact(contact) if contact != [] else False
288
+ valid_email = is_valid_email(email) if email != [] else False
289
 
290
  invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
291
  invalid_email = 'Invalid email' if not valid_email else 'Valid email'
 
324
  # Combine both personal and professional details into a structured output
325
  result = {
326
  "personal": {
327
+ "name": per_data.get('personal', {}).get(['name'], ['Not found']),
328
+ "contact": per_data.get('personal', {}).get(['contact_number'], ['Not found']),
329
+ "email": per_data.get('personal', {}).get(['email'], ['Not found']),
330
+ "location": per_data.get('personal', {}).get(['Address'], ['Not found']),
331
  "linkedin": linkedin_links,
332
  "github": github_links,
333
  "other_links": hyperlinks # Store remaining links if needed
334
  },
335
  "professional": {
336
+ "technical_skills": pro_data.get('professional', {}).get(['technical_skills'], ['Not found']),
337
+ "non_technical_skills": pro_data.get('professional', {}).get(['non_technical_skills'], ['Not found']),
338
+ "tools": pro_data.get('professional', {}).get(['tools'], ['Not found']),
339
  "experience": [
340
  {
341
+ "company": pro_data.get('professional', {}).get('companies_worked_at', ['Not found']),
342
+ "projects": pro_data.get('professional', {}).get('projects', ['Not found']),
343
+ "role": pro_data.get('professional', {}).get('worked_as', ['Not found']),
344
+ "years": pro_data.get('professional', {}).get('experience', ['Not found']),
345
+ "project_experience": pro_data.get('professional', {}).get('projects_experience', ['Not found'])
346
  }
347
  ],
348
  "education": [
349
  {
350
+ "qualification": pro_data.get('professional', {}).get('qualification', ['Not found']),
351
+ "university": pro_data.get('professional', {}).get('university', ['Not found']),
352
+ "course": pro_data.get('professional', {}).get('course', ['Not found']),
353
+ "certificate": pro_data.get('professional', {}).get('certification', ['Not found'])
354
  }
355
  ]
356
  }
357
  }
358
 
359
+
360
+
361
+ #Appending the list if any available as a text
362
+ result['personal']['other_links'] += per_data.get('personal', {}).get('link', ['Not found'])
363
+
364
+ #Added the validator for details, Validate contact and email
365
  valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
366
  result['personal']['valid_contact'] = valid_contact
367
  result['personal']['invalid_contact'] = invalid_contact