WebashalarForML commited on
Commit
6e27f24
·
verified ·
1 Parent(s): 254bebc

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +12 -8
utils/mistral.py CHANGED
@@ -39,24 +39,28 @@ def Data_Cleaner(text):
39
  return text # Return the original text if cleaning goes wrong
40
  '''
41
  def Data_Cleaner(text):
42
- # Use a regex pattern to extract everything between ```json and ```
43
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
44
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
45
 
46
  if match:
47
  json_str = match.group(1).strip() # Extract JSON block
48
  else:
49
- logging.error("JSON block not found in the text")
50
- return text # Return the original text if no match is found
 
 
 
 
 
51
 
52
  # Validate and return the cleaned JSON if it's valid
53
  try:
54
  json_obj = json.loads(json_str) # Validate JSON
55
- return json_str # Return the parsed JSON as a string
56
  except json.JSONDecodeError:
57
  logging.error("Extracted text is not valid JSON")
58
- return text # Return the original text if JSON decoding fails
59
-
60
 
61
  # Function to call Mistral and process output
62
  def Model_ProfessionalDetails_Output(resume, client):
@@ -425,7 +429,7 @@ def process_resume_data(file_path):
425
  "location": normalize_data(per_data.get('personal', {}).get('address', None)),
426
  "linkedin": normalize_data(linkedin_links),
427
  "github": normalize_data(github_links),
428
- "other_links": normalize_data(hyperlinks)
429
  },
430
  "professional": {
431
  "technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
@@ -455,7 +459,7 @@ def process_resume_data(file_path):
455
 
456
  #Appending the list if any available as a text
457
  if result['personal']['other_links'] is not None:
458
- result['personal']['other_links'] += links
459
 
460
  #Added the validator for details, Validate contact and email
461
  #valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
 
39
  return text # Return the original text if cleaning goes wrong
40
  '''
41
  def Data_Cleaner(text):
42
+ # Use a regex pattern to extract JSON if it exists within ```json and ```
43
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
44
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
45
 
46
  if match:
47
  json_str = match.group(1).strip() # Extract JSON block
48
  else:
49
+ # If no match, check if text itself is a JSON object
50
+ try:
51
+ json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
52
+ return text # Return the parsed JSON as a dictionary
53
+ except json.JSONDecodeError:
54
+ logging.error("No valid JSON found in the text")
55
+ return text # Return the original text if no valid JSON is found
56
 
57
  # Validate and return the cleaned JSON if it's valid
58
  try:
59
  json_obj = json.loads(json_str) # Validate JSON
60
+ return json_str # Return the parsed JSON as a dictionary
61
  except json.JSONDecodeError:
62
  logging.error("Extracted text is not valid JSON")
63
+ return text # Return the original text if JSON decoding fa
 
64
 
65
  # Function to call Mistral and process output
66
  def Model_ProfessionalDetails_Output(resume, client):
 
429
  "location": normalize_data(per_data.get('personal', {}).get('address', None)),
430
  "linkedin": normalize_data(linkedin_links),
431
  "github": normalize_data(github_links),
432
+ "other_links": hyperlinks
433
  },
434
  "professional": {
435
  "technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
 
459
 
460
  #Appending the list if any available as a text
461
  if result['personal']['other_links'] is not None:
462
+ result['personal']['other_links'] += normalize_data(links)
463
 
464
  #Added the validator for details, Validate contact and email
465
  #valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])