ResumeExtractor3

Sleeping

App Files Files Community

WebashalarForML commited on Oct 18, 2024

Commit

6e27f24

verified ·

1 Parent(s): 254bebc

Update utils/mistral.py

Browse files

Files changed (1) hide show

utils/mistral.py +12 -8

utils/mistral.py CHANGED Viewed

@@ -39,24 +39,28 @@ def Data_Cleaner(text):
         return text  # Return the original text if cleaning goes wrong
 '''
 def Data_Cleaner(text):
-    # Use a regex pattern to extract everything between ```json and ```
     pattern = r"```json\s*(\{.*?\})\s*```"  # Non-greedy matching inside braces
     match = re.search(pattern, text, re.DOTALL)  # DOTALL to match newlines
     if match:
         json_str = match.group(1).strip()  # Extract JSON block
     else:
-        logging.error("JSON block not found in the text")
-        return text  # Return the original text if no match is found
     # Validate and return the cleaned JSON if it's valid
     try:
         json_obj = json.loads(json_str)  # Validate JSON
-        return json_str  # Return the parsed JSON as a string
     except json.JSONDecodeError:
         logging.error("Extracted text is not valid JSON")
-        return text  # Return the original text if JSON decoding fails
 # Function to call Mistral and process output
 def Model_ProfessionalDetails_Output(resume, client):
@@ -425,7 +429,7 @@ def process_resume_data(file_path):
                 "location": normalize_data(per_data.get('personal', {}).get('address', None)),
                 "linkedin": normalize_data(linkedin_links),
                 "github": normalize_data(github_links),
-                "other_links": normalize_data(hyperlinks)
             },
             "professional": {
                 "technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
@@ -455,7 +459,7 @@ def process_resume_data(file_path):
         #Appending the list if any available as a text
         if result['personal']['other_links'] is not None:
-            result['personal']['other_links'] += links
         #Added the validator for details, Validate contact and email
         #valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])

         return text  # Return the original text if cleaning goes wrong
 '''
 def Data_Cleaner(text):
+    # Use a regex pattern to extract JSON if it exists within ```json and ```
     pattern = r"```json\s*(\{.*?\})\s*```"  # Non-greedy matching inside braces
     match = re.search(pattern, text, re.DOTALL)  # DOTALL to match newlines
     if match:
         json_str = match.group(1).strip()  # Extract JSON block
     else:
+        # If no match, check if text itself is a JSON object
+        try:
+            json_obj = json.loads(text.strip())  # Attempt to load the text as JSON
+            return text  # Return the parsed JSON as a dictionary
+        except json.JSONDecodeError:
+            logging.error("No valid JSON found in the text")
+            return text  # Return the original text if no valid JSON is found
     # Validate and return the cleaned JSON if it's valid
     try:
         json_obj = json.loads(json_str)  # Validate JSON
+        return json_str  # Return the parsed JSON as a dictionary
     except json.JSONDecodeError:
         logging.error("Extracted text is not valid JSON")
+        return text  # Return the original text if JSON decoding fa
 # Function to call Mistral and process output
 def Model_ProfessionalDetails_Output(resume, client):
                 "location": normalize_data(per_data.get('personal', {}).get('address', None)),
                 "linkedin": normalize_data(linkedin_links),
                 "github": normalize_data(github_links),
+                "other_links": hyperlinks
             },
             "professional": {
                 "technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
         #Appending the list if any available as a text
         if result['personal']['other_links'] is not None:
+            result['personal']['other_links'] += normalize_data(links)
         #Added the validator for details, Validate contact and email
         #valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])