Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +50 -10
utils/mistral.py
CHANGED
@@ -42,7 +42,7 @@ def Data_Cleaner(text):
|
|
42 |
def Model_ProfessionalDetails_Output(resume, client):
|
43 |
system_role = {
|
44 |
"role": "system",
|
45 |
-
"content": "You are a skilled resume parser. Your task is to extract Professional details
|
46 |
}
|
47 |
user_prompt = {
|
48 |
"role": "user",
|
@@ -81,6 +81,41 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
81 |
|
82 |
return parsed_response
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def Model_PersonalDetails_Output(resume, client):
|
85 |
system_role = {
|
86 |
"role": "system",
|
@@ -300,7 +335,7 @@ def extract_link_details(text):
|
|
300 |
email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
|
301 |
|
302 |
# URL and links regex, updated to avoid conflicts with email domains
|
303 |
-
link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)
|
304 |
|
305 |
emails = email_regex.findall(text)
|
306 |
|
@@ -325,15 +360,18 @@ def process_resume_data(file_path):
|
|
325 |
try:
|
326 |
# Extract personal details using Mistral
|
327 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
328 |
-
print(per_data)
|
329 |
|
330 |
# Extract professional details using Mistral
|
331 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
332 |
-
print(pro_data)
|
333 |
|
|
|
|
|
|
|
334 |
# Extract link using Regular Expression
|
335 |
links = extract_link_details(resume_text)
|
336 |
-
print(links)
|
337 |
|
338 |
# Check if per_data and pro_data have been populated correctly
|
339 |
if not per_data:
|
@@ -370,10 +408,10 @@ def process_resume_data(file_path):
|
|
370 |
],
|
371 |
"education": [
|
372 |
{
|
373 |
-
"qualification":
|
374 |
-
"university":
|
375 |
-
"course":
|
376 |
-
"certificate":
|
377 |
}
|
378 |
]
|
379 |
}
|
@@ -382,7 +420,7 @@ def process_resume_data(file_path):
|
|
382 |
|
383 |
|
384 |
#Appending the list if any available as a text
|
385 |
-
result['personal']['other_links'] += per_data.get('personal', {}).get('link',
|
386 |
result['personal']['other_links'] += links
|
387 |
#Added the validator for details, Validate contact and email
|
388 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
@@ -391,6 +429,8 @@ def process_resume_data(file_path):
|
|
391 |
result['personal']['valid_email'] = valid_email
|
392 |
result['personal']['invalid_email'] = invalid_email
|
393 |
|
|
|
|
|
394 |
# If Mistral produces valid output, return it
|
395 |
if per_data or pro_data:
|
396 |
logging.info("Successfully extracted data using Mistral.")
|
|
|
42 |
def Model_ProfessionalDetails_Output(resume, client):
|
43 |
system_role = {
|
44 |
"role": "system",
|
45 |
+
"content": "You are a skilled resume parser. Your task is to extract Professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
46 |
}
|
47 |
user_prompt = {
|
48 |
"role": "user",
|
|
|
81 |
|
82 |
return parsed_response
|
83 |
|
84 |
+
# Function to call Mistral and process output
|
85 |
+
def Model_EducationalDetails_Output(resume, client):
|
86 |
+
system_role = {
|
87 |
+
"role": "system",
|
88 |
+
"content": "You are a skilled resume parser. Your task is to Extract All Educational qualifications, including Degrees and Certifications from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
89 |
+
}
|
90 |
+
user_prompt = {
|
91 |
+
"role": "user",
|
92 |
+
"content": f'''Act as a resume parser for the following text given in text: {resume}
|
93 |
+
Extract the text in the following output JSON string as:
|
94 |
+
{{
|
95 |
+
"educational": {{
|
96 |
+
"certifications": ["List and Extract all certifications mentioned in the resume."],
|
97 |
+
"qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
|
98 |
+
"university": ["List and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
|
99 |
+
"courses": ["List and Extract the names of completed courses or based on the resume. If none are found, return []."]
|
100 |
+
}}
|
101 |
+
}}
|
102 |
+
output:
|
103 |
+
'''
|
104 |
+
}
|
105 |
+
|
106 |
+
response = ""
|
107 |
+
for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
|
108 |
+
response += message.choices[0].delta.content
|
109 |
+
|
110 |
+
try:
|
111 |
+
clean_response = Data_Cleaner(response)
|
112 |
+
parsed_response = json.loads(clean_response)
|
113 |
+
except json.JSONDecodeError as e:
|
114 |
+
logging.error(f"JSON Decode Error: {e}")
|
115 |
+
return {}
|
116 |
+
|
117 |
+
return parsed_response
|
118 |
+
|
119 |
def Model_PersonalDetails_Output(resume, client):
|
120 |
system_role = {
|
121 |
"role": "system",
|
|
|
335 |
email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
|
336 |
|
337 |
# URL and links regex, updated to avoid conflicts with email domains
|
338 |
+
link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)[a-zA-Z0-9-]+\.(?:com|co\.in|co|io|org|net|edu|gov|mil|int|uk|us|in|de|au|app|tech|xyz|info|biz|fr|dev)\b')
|
339 |
|
340 |
emails = email_regex.findall(text)
|
341 |
|
|
|
360 |
try:
|
361 |
# Extract personal details using Mistral
|
362 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
363 |
+
print(f"Personal Data -----> {per_data}")
|
364 |
|
365 |
# Extract professional details using Mistral
|
366 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
367 |
+
print(f"Professional Data -----> {pro_data}")
|
368 |
|
369 |
+
Edu_data=Model_EducationalDetails_Output(resume, client)
|
370 |
+
print(f"Educational Data -----> {Edu_data}")
|
371 |
+
|
372 |
# Extract link using Regular Expression
|
373 |
links = extract_link_details(resume_text)
|
374 |
+
print(f"Links Data -----> {links}")
|
375 |
|
376 |
# Check if per_data and pro_data have been populated correctly
|
377 |
if not per_data:
|
|
|
408 |
],
|
409 |
"education": [
|
410 |
{
|
411 |
+
"qualification": Edu_data.get('educational', {}).get('qualification', 'Not found'),
|
412 |
+
"university": Edu_data.get('educational', {}).get('university', 'Not found'),
|
413 |
+
"course": Edu_data.get('educational', {}).get('course', 'Not found'),
|
414 |
+
"certificate": Edu_data.get('educational', {}).get('certification', 'Not found')
|
415 |
}
|
416 |
]
|
417 |
}
|
|
|
420 |
|
421 |
|
422 |
#Appending the list if any available as a text
|
423 |
+
result['personal']['other_links'] += per_data.get('personal', {}).get('link', [])
|
424 |
result['personal']['other_links'] += links
|
425 |
#Added the validator for details, Validate contact and email
|
426 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
|
|
429 |
result['personal']['valid_email'] = valid_email
|
430 |
result['personal']['invalid_email'] = invalid_email
|
431 |
|
432 |
+
#Appending the Educational Details if any available as a text
|
433 |
+
|
434 |
# If Mistral produces valid output, return it
|
435 |
if per_data or pro_data:
|
436 |
logging.info("Successfully extracted data using Mistral.")
|