ResumeExtractor3

Sleeping

App Files Files Community

WebashalarForML commited on Oct 17, 2024

Commit

d12b821

verified ·

1 Parent(s): d581df7

Update utils/mistral.py

Browse files

Files changed (1) hide show

utils/mistral.py +50 -10

utils/mistral.py CHANGED Viewed

@@ -42,7 +42,7 @@ def Data_Cleaner(text):
 def Model_ProfessionalDetails_Output(resume, client):
     system_role = {
     "role": "system",
-    "content": "You are a skilled resume parser. Your task is to extract Professional details as well as Academic details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
     }
     user_prompt = {
     "role": "user",
@@ -81,6 +81,41 @@ def Model_ProfessionalDetails_Output(resume, client):
     return parsed_response
 def Model_PersonalDetails_Output(resume, client):
     system_role = {
     "role": "system",
@@ -300,7 +335,7 @@ def extract_link_details(text):
     email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
     # URL and links regex, updated to avoid conflicts with email domains
-    link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)?[a-zA-Z0-9-]+\.(?:com|co\.in|co|io|org|net|edu|gov|mil|int|uk|us|in|de|au|app|tech|xyz|info|biz|fr|dev)\b')
     emails = email_regex.findall(text)
@@ -325,15 +360,18 @@ def process_resume_data(file_path):
     try:
         # Extract personal details using Mistral
         per_data = Model_PersonalDetails_Output(resume_text, client)
-        print(per_data)
         # Extract professional details using Mistral
         pro_data = Model_ProfessionalDetails_Output(resume_text, client)
-        print(pro_data)
         # Extract link using Regular Expression
         links = extract_link_details(resume_text)
-        print(links)
         # Check if per_data and pro_data have been populated correctly
         if not per_data:
@@ -370,10 +408,10 @@ def process_resume_data(file_path):
                 ],
                 "education": [
                     {
-                        "qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
-                        "university": pro_data.get('professional', {}).get('university', 'Not found'),
-                        "course": pro_data.get('professional', {}).get('course', 'Not found'),
-                        "certificate": pro_data.get('professional', {}).get('certification', 'Not found')
                     }
                 ]
             }
@@ -382,7 +420,7 @@ def process_resume_data(file_path):
         #Appending the list if any available as a text
-        result['personal']['other_links'] += per_data.get('personal', {}).get('link', 'Not found')
         result['personal']['other_links'] += links
         #Added the validator for details, Validate contact and email
         valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
@@ -391,6 +429,8 @@ def process_resume_data(file_path):
         result['personal']['valid_email'] = valid_email
         result['personal']['invalid_email'] = invalid_email
         # If Mistral produces valid output, return it
         if per_data or pro_data:
             logging.info("Successfully extracted data using Mistral.")

 def Model_ProfessionalDetails_Output(resume, client):
     system_role = {
     "role": "system",
+    "content": "You are a skilled resume parser. Your task is to extract Professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
     }
     user_prompt = {
     "role": "user",
     return parsed_response
+# Function to call Mistral and process output
+def Model_EducationalDetails_Output(resume, client):
+    system_role = {
+    "role": "system",
+    "content": "You are a skilled resume parser. Your task is to Extract All Educational qualifications, including Degrees and Certifications  from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
+    }
+    user_prompt = {
+    "role": "user",
+    "content": f'''Act as a resume parser for the following text given in text: {resume}
+    Extract the text in the following output JSON string as:
+    {{
+        "educational": {{
+            "certifications": ["List and Extract  all certifications mentioned in the resume."],
+            "qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
+            "university": ["List and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
+            "courses": ["List and Extract the names of completed courses or based on the resume. If none are found, return []."]
+        }}
+    }}
+    output:
+    '''
+    }
+    response = ""
+    for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
+        response += message.choices[0].delta.content
+    try:
+        clean_response = Data_Cleaner(response)
+        parsed_response = json.loads(clean_response)
+    except json.JSONDecodeError as e:
+        logging.error(f"JSON Decode Error: {e}")
+        return {}
+    return parsed_response
 def Model_PersonalDetails_Output(resume, client):
     system_role = {
     "role": "system",
     email_regex = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')
     # URL and links regex, updated to avoid conflicts with email domains
+    link_regex = re.compile(r'\b(?:https?:\/\/)?(?:www\.)[a-zA-Z0-9-]+\.(?:com|co\.in|co|io|org|net|edu|gov|mil|int|uk|us|in|de|au|app|tech|xyz|info|biz|fr|dev)\b')
     emails = email_regex.findall(text)
     try:
         # Extract personal details using Mistral
         per_data = Model_PersonalDetails_Output(resume_text, client)
+        print(f"Personal Data -----> {per_data}")
         # Extract professional details using Mistral
         pro_data = Model_ProfessionalDetails_Output(resume_text, client)
+        print(f"Professional Data -----> {pro_data}")
+        Edu_data=Model_EducationalDetails_Output(resume, client)
+        print(f"Educational Data -----> {Edu_data}")
         # Extract link using Regular Expression
         links = extract_link_details(resume_text)
+        print(f"Links Data -----> {links}")
         # Check if per_data and pro_data have been populated correctly
         if not per_data:
                 ],
                 "education": [
                     {
+                        "qualification": Edu_data.get('educational', {}).get('qualification', 'Not found'),
+                        "university": Edu_data.get('educational', {}).get('university', 'Not found'),
+                        "course": Edu_data.get('educational', {}).get('course', 'Not found'),
+                        "certificate": Edu_data.get('educational', {}).get('certification', 'Not found')
                     }
                 ]
             }
         #Appending the list if any available as a text
+        result['personal']['other_links'] += per_data.get('personal', {}).get('link', [])
         result['personal']['other_links'] += links
         #Added the validator for details, Validate contact and email
         valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
         result['personal']['valid_email'] = valid_email
         result['personal']['invalid_email'] = invalid_email
+        #Appending the Educational Details if any available as a text
         # If Mistral produces valid output, return it
         if per_data or pro_data:
             logging.info("Successfully extracted data using Mistral.")