Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +12 -8
utils/mistral.py
CHANGED
@@ -39,24 +39,28 @@ def Data_Cleaner(text):
|
|
39 |
return text # Return the original text if cleaning goes wrong
|
40 |
'''
|
41 |
def Data_Cleaner(text):
|
42 |
-
# Use a regex pattern to extract
|
43 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
44 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
45 |
|
46 |
if match:
|
47 |
json_str = match.group(1).strip() # Extract JSON block
|
48 |
else:
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Validate and return the cleaned JSON if it's valid
|
53 |
try:
|
54 |
json_obj = json.loads(json_str) # Validate JSON
|
55 |
-
return json_str # Return the parsed JSON as a
|
56 |
except json.JSONDecodeError:
|
57 |
logging.error("Extracted text is not valid JSON")
|
58 |
-
return text # Return the original text if JSON decoding
|
59 |
-
|
60 |
|
61 |
# Function to call Mistral and process output
|
62 |
def Model_ProfessionalDetails_Output(resume, client):
|
@@ -425,7 +429,7 @@ def process_resume_data(file_path):
|
|
425 |
"location": normalize_data(per_data.get('personal', {}).get('address', None)),
|
426 |
"linkedin": normalize_data(linkedin_links),
|
427 |
"github": normalize_data(github_links),
|
428 |
-
"other_links":
|
429 |
},
|
430 |
"professional": {
|
431 |
"technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
|
@@ -455,7 +459,7 @@ def process_resume_data(file_path):
|
|
455 |
|
456 |
#Appending the list if any available as a text
|
457 |
if result['personal']['other_links'] is not None:
|
458 |
-
result['personal']['other_links'] += links
|
459 |
|
460 |
#Added the validator for details, Validate contact and email
|
461 |
#valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
|
|
39 |
return text # Return the original text if cleaning goes wrong
|
40 |
'''
|
41 |
def Data_Cleaner(text):
|
42 |
+
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
43 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
44 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
45 |
|
46 |
if match:
|
47 |
json_str = match.group(1).strip() # Extract JSON block
|
48 |
else:
|
49 |
+
# If no match, check if text itself is a JSON object
|
50 |
+
try:
|
51 |
+
json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
|
52 |
+
return text # Return the parsed JSON as a dictionary
|
53 |
+
except json.JSONDecodeError:
|
54 |
+
logging.error("No valid JSON found in the text")
|
55 |
+
return text # Return the original text if no valid JSON is found
|
56 |
|
57 |
# Validate and return the cleaned JSON if it's valid
|
58 |
try:
|
59 |
json_obj = json.loads(json_str) # Validate JSON
|
60 |
+
return json_str # Return the parsed JSON as a dictionary
|
61 |
except json.JSONDecodeError:
|
62 |
logging.error("Extracted text is not valid JSON")
|
63 |
+
return text # Return the original text if JSON decoding fa
|
|
|
64 |
|
65 |
# Function to call Mistral and process output
|
66 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
|
429 |
"location": normalize_data(per_data.get('personal', {}).get('address', None)),
|
430 |
"linkedin": normalize_data(linkedin_links),
|
431 |
"github": normalize_data(github_links),
|
432 |
+
"other_links": hyperlinks
|
433 |
},
|
434 |
"professional": {
|
435 |
"technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
|
|
|
459 |
|
460 |
#Appending the list if any available as a text
|
461 |
if result['personal']['other_links'] is not None:
|
462 |
+
result['personal']['other_links'] += normalize_data(links)
|
463 |
|
464 |
#Added the validator for details, Validate contact and email
|
465 |
#valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|