WebashalarForML commited on
Commit
d12b821
·
verified ·
1 Parent(s): d581df7

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +50 -10
utils/mistral.py CHANGED
@@ -42,7 +42,7 @@ def Data_Cleaner(text):
42
  def Model_ProfessionalDetails_Output(resume, client):
43
  system_role = {
44
  "role": "system",
45
- "content": "You are a skilled resume parser. Your task is to extract Professional details as well as Academic details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
46
  }
47
  user_prompt = {
48
  "role": "user",
@@ -81,6 +81,41 @@ def Model_ProfessionalDetails_Output(resume, client):
81
 
82
  return parsed_response
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def Model_PersonalDetails_Output(resume, client):
85
  system_role = {
86
  "role": "system",
@@ -300,7 +335,7 @@ def extract_link_details(text):
300
  email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
301
 
302
  # URL and links regex, updated to avoid conflicts with email domains
303
- link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)?[a-zA-Z0-9-]+\.(?:com|co\.in|co|io|org|net|edu|gov|mil|int|uk|us|in|de|au|app|tech|xyz|info|biz|fr|dev)\b')
304
 
305
  emails = email_regex.findall(text)
306
 
@@ -325,15 +360,18 @@ def process_resume_data(file_path):
325
  try:
326
  # Extract personal details using Mistral
327
  per_data = Model_PersonalDetails_Output(resume_text, client)
328
- print(per_data)
329
 
330
  # Extract professional details using Mistral
331
  pro_data = Model_ProfessionalDetails_Output(resume_text, client)
332
- print(pro_data)
333
 
 
 
 
334
  # Extract link using Regular Expression
335
  links = extract_link_details(resume_text)
336
- print(links)
337
 
338
  # Check if per_data and pro_data have been populated correctly
339
  if not per_data:
@@ -370,10 +408,10 @@ def process_resume_data(file_path):
370
  ],
371
  "education": [
372
  {
373
- "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
374
- "university": pro_data.get('professional', {}).get('university', 'Not found'),
375
- "course": pro_data.get('professional', {}).get('course', 'Not found'),
376
- "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
377
  }
378
  ]
379
  }
@@ -382,7 +420,7 @@ def process_resume_data(file_path):
382
 
383
 
384
  #Appending the list if any available as a text
385
- result['personal']['other_links'] += per_data.get('personal', {}).get('link', 'Not found')
386
  result['personal']['other_links'] += links
387
  #Added the validator for details, Validate contact and email
388
  valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
@@ -391,6 +429,8 @@ def process_resume_data(file_path):
391
  result['personal']['valid_email'] = valid_email
392
  result['personal']['invalid_email'] = invalid_email
393
 
 
 
394
  # If Mistral produces valid output, return it
395
  if per_data or pro_data:
396
  logging.info("Successfully extracted data using Mistral.")
 
42
  def Model_ProfessionalDetails_Output(resume, client):
43
  system_role = {
44
  "role": "system",
45
+ "content": "You are a skilled resume parser. Your task is to extract Professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
46
  }
47
  user_prompt = {
48
  "role": "user",
 
81
 
82
  return parsed_response
83
 
84
+ # Function to call Mistral and process output
85
+ def Model_EducationalDetails_Output(resume, client):
86
+ system_role = {
87
+ "role": "system",
88
+ "content": "You are a skilled resume parser. Your task is to Extract All Educational qualifications, including Degrees and Certifications from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
89
+ }
90
+ user_prompt = {
91
+ "role": "user",
92
+ "content": f'''Act as a resume parser for the following text given in text: {resume}
93
+ Extract the text in the following output JSON string as:
94
+ {{
95
+ "educational": {{
96
+ "certifications": ["List and Extract all certifications mentioned in the resume."],
97
+ "qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
98
+ "university": ["List and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
99
+ "courses": ["List and Extract the names of completed courses or based on the resume. If none are found, return []."]
100
+ }}
101
+ }}
102
+ output:
103
+ '''
104
+ }
105
+
106
+ response = ""
107
+ for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
108
+ response += message.choices[0].delta.content
109
+
110
+ try:
111
+ clean_response = Data_Cleaner(response)
112
+ parsed_response = json.loads(clean_response)
113
+ except json.JSONDecodeError as e:
114
+ logging.error(f"JSON Decode Error: {e}")
115
+ return {}
116
+
117
+ return parsed_response
118
+
119
  def Model_PersonalDetails_Output(resume, client):
120
  system_role = {
121
  "role": "system",
 
335
  email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
336
 
337
  # URL and links regex, updated to avoid conflicts with email domains
338
+ link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)[a-zA-Z0-9-]+\.(?:com|co\.in|co|io|org|net|edu|gov|mil|int|uk|us|in|de|au|app|tech|xyz|info|biz|fr|dev)\b')
339
 
340
  emails = email_regex.findall(text)
341
 
 
360
  try:
361
  # Extract personal details using Mistral
362
  per_data = Model_PersonalDetails_Output(resume_text, client)
363
+ print(f"Personal Data -----> {per_data}")
364
 
365
  # Extract professional details using Mistral
366
  pro_data = Model_ProfessionalDetails_Output(resume_text, client)
367
+ print(f"Professional Data -----> {pro_data}")
368
 
369
+ Edu_data=Model_EducationalDetails_Output(resume, client)
370
+ print(f"Educational Data -----> {Edu_data}")
371
+
372
  # Extract link using Regular Expression
373
  links = extract_link_details(resume_text)
374
+ print(f"Links Data -----> {links}")
375
 
376
  # Check if per_data and pro_data have been populated correctly
377
  if not per_data:
 
408
  ],
409
  "education": [
410
  {
411
+ "qualification": Edu_data.get('educational', {}).get('qualification', 'Not found'),
412
+ "university": Edu_data.get('educational', {}).get('university', 'Not found'),
413
+ "course": Edu_data.get('educational', {}).get('course', 'Not found'),
414
+ "certificate": Edu_data.get('educational', {}).get('certification', 'Not found')
415
  }
416
  ]
417
  }
 
420
 
421
 
422
  #Appending the list if any available as a text
423
+ result['personal']['other_links'] += per_data.get('personal', {}).get('link', [])
424
  result['personal']['other_links'] += links
425
  #Added the validator for details, Validate contact and email
426
  valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
 
429
  result['personal']['valid_email'] = valid_email
430
  result['personal']['invalid_email'] = invalid_email
431
 
432
+ #Appending the Educational Details if any available as a text
433
+
434
  # If Mistral produces valid output, return it
435
  if per_data or pro_data:
436
  logging.info("Successfully extracted data using Mistral.")