Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +39 -34
utils/mistral.py
CHANGED
@@ -41,7 +41,7 @@ def Data_Cleaner(text):
|
|
41 |
def Model_ProfessionalDetails_Output(resume, client):
|
42 |
system_role = {
|
43 |
"role": "system",
|
44 |
-
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return
|
45 |
}
|
46 |
user_prompt = {
|
47 |
"role": "user",
|
@@ -52,16 +52,16 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
52 |
"technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
|
53 |
"non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
|
54 |
"tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
|
|
|
55 |
"projects": ["Extract all projects names or titles mentioned in the resume."],
|
56 |
"projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
|
57 |
-
"experience": ["Calculate total professional work experience in years and months based on the resume."],
|
58 |
-
"companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
|
59 |
"certifications": ["Extract and list all certifications obtained as stated in the resume."],
|
60 |
"roles": ["Include the names of all job titles or roles held as indicated in the resume."],
|
61 |
-
"qualifications": ["List educational qualifications
|
62 |
-
"
|
63 |
-
"
|
64 |
-
"year_of_graduation": ["Extract the year of graduation from the resume. If not found, return
|
65 |
}}
|
66 |
}}
|
67 |
output:
|
@@ -84,7 +84,7 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
84 |
def Model_PersonalDetails_Output(resume, client):
|
85 |
system_role = {
|
86 |
"role": "system",
|
87 |
-
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return
|
88 |
}
|
89 |
user_prompt = {
|
90 |
"role": "user",
|
@@ -92,11 +92,11 @@ def Model_PersonalDetails_Output(resume, client):
|
|
92 |
Extract the text in the following output JSON string as:
|
93 |
{{
|
94 |
"personal": {{
|
95 |
-
"name": "Extract the full name based on the resume. If not found, return
|
96 |
-
"contact_number": "Extract the contact number from the resume. If not found, return
|
97 |
-
"email": "Extract the email address from the resume. If not found, return
|
98 |
-
"Address": "Extract the Address or address from the resume. If not found, return
|
99 |
-
"link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return
|
100 |
}}
|
101 |
}}
|
102 |
output:
|
@@ -281,11 +281,11 @@ def is_valid_contact(contact):
|
|
281 |
|
282 |
|
283 |
def validate_contact_email(personal_data):
|
284 |
-
contact = personal_data.get('contact',
|
285 |
-
email = personal_data.get('email',
|
286 |
|
287 |
-
valid_contact = is_valid_contact(contact) if contact !=
|
288 |
-
valid_email = is_valid_email(email) if email !=
|
289 |
|
290 |
invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
|
291 |
invalid_email = 'Invalid email' if not valid_email else 'Valid email'
|
@@ -324,39 +324,44 @@ def process_resume_data(file_path):
|
|
324 |
# Combine both personal and professional details into a structured output
|
325 |
result = {
|
326 |
"personal": {
|
327 |
-
"name": per_data.get('personal', {}).get('name', 'Not found'),
|
328 |
-
"contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
|
329 |
-
"email": per_data.get('personal', {}).get('email', 'Not found'),
|
330 |
-
"location": per_data.get('personal', {}).get('Address', 'Not found'),
|
331 |
"linkedin": linkedin_links,
|
332 |
"github": github_links,
|
333 |
"other_links": hyperlinks # Store remaining links if needed
|
334 |
},
|
335 |
"professional": {
|
336 |
-
"technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
|
337 |
-
"non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
|
338 |
-
"tools": pro_data.get('professional', {}).get('tools', 'Not found'),
|
339 |
"experience": [
|
340 |
{
|
341 |
-
"company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
|
342 |
-
"projects": pro_data.get('professional', {}).get('projects', 'Not found'),
|
343 |
-
"role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
|
344 |
-
"years": pro_data.get('professional', {}).get('experience', 'Not found'),
|
345 |
-
"project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
|
346 |
}
|
347 |
],
|
348 |
"education": [
|
349 |
{
|
350 |
-
"qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
|
351 |
-
"university": pro_data.get('professional', {}).get('university', 'Not found'),
|
352 |
-
"course": pro_data.get('professional', {}).get('course', 'Not found'),
|
353 |
-
"certificate": pro_data.get('professional', {}).get('certification', 'Not found')
|
354 |
}
|
355 |
]
|
356 |
}
|
357 |
}
|
358 |
|
359 |
-
|
|
|
|
|
|
|
|
|
|
|
360 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
361 |
result['personal']['valid_contact'] = valid_contact
|
362 |
result['personal']['invalid_contact'] = invalid_contact
|
|
|
41 |
def Model_ProfessionalDetails_Output(resume, client):
|
42 |
system_role = {
|
43 |
"role": "system",
|
44 |
+
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
45 |
}
|
46 |
user_prompt = {
|
47 |
"role": "user",
|
|
|
52 |
"technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
|
53 |
"non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
|
54 |
"tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
|
55 |
+
"companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
|
56 |
"projects": ["Extract all projects names or titles mentioned in the resume."],
|
57 |
"projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
|
58 |
+
"experience": ["Calculate total professional work experience in years and months based on the resume."],
|
|
|
59 |
"certifications": ["Extract and list all certifications obtained as stated in the resume."],
|
60 |
"roles": ["Include the names of all job titles or roles held as indicated in the resume."],
|
61 |
+
"qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
|
62 |
+
"university": ["Identify and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
|
63 |
+
"courses": ["Extract the names of completed courses or based on the resume. If none are found, return []."],
|
64 |
+
"year_of_graduation": ["Extract the year of graduation from the resume. If not found, return []."]
|
65 |
}}
|
66 |
}}
|
67 |
output:
|
|
|
84 |
def Model_PersonalDetails_Output(resume, client):
|
85 |
system_role = {
|
86 |
"role": "system",
|
87 |
+
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
88 |
}
|
89 |
user_prompt = {
|
90 |
"role": "user",
|
|
|
92 |
Extract the text in the following output JSON string as:
|
93 |
{{
|
94 |
"personal": {{
|
95 |
+
"name": ["Extract the full name based on the resume. If not found, return []."],
|
96 |
+
"contact_number": ["Extract the contact number from the resume. If not found, return []."],
|
97 |
+
"email": ["Extract the email address from the resume. If not found, return []."],
|
98 |
+
"Address": ["Extract the Address or address from the resume. If not found, return []."],
|
99 |
+
"link": ["Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return []."]
|
100 |
}}
|
101 |
}}
|
102 |
output:
|
|
|
281 |
|
282 |
|
283 |
def validate_contact_email(personal_data):
|
284 |
+
contact = personal_data.get('contact', [])
|
285 |
+
email = personal_data.get('email', [])
|
286 |
|
287 |
+
valid_contact = is_valid_contact(contact) if contact != [] else False
|
288 |
+
valid_email = is_valid_email(email) if email != [] else False
|
289 |
|
290 |
invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
|
291 |
invalid_email = 'Invalid email' if not valid_email else 'Valid email'
|
|
|
324 |
# Combine both personal and professional details into a structured output
|
325 |
result = {
|
326 |
"personal": {
|
327 |
+
"name": per_data.get('personal', {}).get(['name'], ['Not found']),
|
328 |
+
"contact": per_data.get('personal', {}).get(['contact_number'], ['Not found']),
|
329 |
+
"email": per_data.get('personal', {}).get(['email'], ['Not found']),
|
330 |
+
"location": per_data.get('personal', {}).get(['Address'], ['Not found']),
|
331 |
"linkedin": linkedin_links,
|
332 |
"github": github_links,
|
333 |
"other_links": hyperlinks # Store remaining links if needed
|
334 |
},
|
335 |
"professional": {
|
336 |
+
"technical_skills": pro_data.get('professional', {}).get(['technical_skills'], ['Not found']),
|
337 |
+
"non_technical_skills": pro_data.get('professional', {}).get(['non_technical_skills'], ['Not found']),
|
338 |
+
"tools": pro_data.get('professional', {}).get(['tools'], ['Not found']),
|
339 |
"experience": [
|
340 |
{
|
341 |
+
"company": pro_data.get('professional', {}).get('companies_worked_at', ['Not found']),
|
342 |
+
"projects": pro_data.get('professional', {}).get('projects', ['Not found']),
|
343 |
+
"role": pro_data.get('professional', {}).get('worked_as', ['Not found']),
|
344 |
+
"years": pro_data.get('professional', {}).get('experience', ['Not found']),
|
345 |
+
"project_experience": pro_data.get('professional', {}).get('projects_experience', ['Not found'])
|
346 |
}
|
347 |
],
|
348 |
"education": [
|
349 |
{
|
350 |
+
"qualification": pro_data.get('professional', {}).get('qualification', ['Not found']),
|
351 |
+
"university": pro_data.get('professional', {}).get('university', ['Not found']),
|
352 |
+
"course": pro_data.get('professional', {}).get('course', ['Not found']),
|
353 |
+
"certificate": pro_data.get('professional', {}).get('certification', ['Not found'])
|
354 |
}
|
355 |
]
|
356 |
}
|
357 |
}
|
358 |
|
359 |
+
|
360 |
+
|
361 |
+
#Appending the list if any available as a text
|
362 |
+
result['personal']['other_links'] += per_data.get('personal', {}).get('link', ['Not found'])
|
363 |
+
|
364 |
+
#Added the validator for details, Validate contact and email
|
365 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
366 |
result['personal']['valid_contact'] = valid_contact
|
367 |
result['personal']['invalid_contact'] = invalid_contact
|