WebashalarForML commited on
Commit
b8b2660
·
verified ·
1 Parent(s): cdd897f

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +51 -7
utility/utils.py CHANGED
@@ -402,7 +402,34 @@ def process_extracted_text(extracted_text):
402
 
403
  # Process the model output for parsed result
404
  def process_resume_data(LLMdata,cont_data,extracted_text):
 
 
 
 
 
 
 
 
 
 
 
 
405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  # Initialize the processed data dictionary
407
  processed_data = {
408
  "name": [],
@@ -415,15 +442,32 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
415
  "extracted_text": extracted_text
416
  }
417
  #LLM
 
418
  processed_data['name'].extend(LLMdata.get('Name', []))
419
- processed_data['contact_number'].extend(LLMdata.get('Contact', []))
420
  processed_data['Designation'].extend(LLMdata.get('Designation', []))
421
- processed_data['email'].extend(LLMdata.get("Email", []))
422
  processed_data['Location'].extend(LLMdata.get('Address', []))
423
- processed_data['Link'].extend(LLMdata.get('Link', []))
424
  processed_data['Company'].extend(LLMdata.get('Company', []))
 
425
  #Contact
426
- processed_data['email'].extend(cont_data.get("emails", []))
427
- processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
428
- processed_data['Link'].extend(cont_data.get("links_RE", []))
429
- return processed_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
  # Process the model output for parsed result
404
  def process_resume_data(LLMdata,cont_data,extracted_text):
405
+
406
+ # Removing duplicate emails
407
+ unique_emails = []
408
+ for email in cont_data['emails']:
409
+ if not any(email.lower() == existing_email.lower() for existing_email in LLMdata['Email']):
410
+ unique_emails.append(email)
411
+
412
+ # Removing duplicate links (case insensitive)
413
+ unique_links = []
414
+ for link in cont_data['links_RE']:
415
+ if not any(link.lower() == existing_link.lower() for existing_link in LLMdata['Link']):
416
+ unique_links.append(link)
417
 
418
+ # Removing duplicate phone numbers
419
+ normalized_contact = [num[-10:] for num in LLMdata['Contact']]
420
+ unique_numbers = []
421
+ for num in cont_data['phone_numbers']:
422
+ if num[-10:] not in normalized_contact:
423
+ unique_numbers.append(num)
424
+
425
+ # Add unique emails, links, and phone numbers to the original LLMdata
426
+ LLMdata['Email'] += unique_emails
427
+ LLMdata['Link'] += unique_links
428
+ LLMdata['Contact'] += unique_numbers
429
+
430
+ # Apply the function to the data
431
+ LLMdata=remove_duplicates_case_insensitive(LLMdata)
432
+
433
  # Initialize the processed data dictionary
434
  processed_data = {
435
  "name": [],
 
442
  "extracted_text": extracted_text
443
  }
444
  #LLM
445
+
446
  processed_data['name'].extend(LLMdata.get('Name', []))
447
+ #processed_data['contact_number'].extend(LLMdata.get('Contact', []))
448
  processed_data['Designation'].extend(LLMdata.get('Designation', []))
449
+ #processed_data['email'].extend(LLMdata.get("Email", []))
450
  processed_data['Location'].extend(LLMdata.get('Address', []))
451
+ #processed_data['Link'].extend(LLMdata.get('Link', []))
452
  processed_data['Company'].extend(LLMdata.get('Company', []))
453
+
454
  #Contact
455
+ #processed_data['email'].extend(cont_data.get("emails", []))
456
+ #processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
457
+ #processed_data['Link'].extend(cont_data.get("links_RE", []))
458
+
459
+ #New_merge_data
460
+ processed_data['email'].extend(LLMdata['Email'])
461
+ processed_data['contact_number'].extend(LLMdata['Contact'])
462
+ processed_data['Link'].extend(LLMdata['Link'])
463
+
464
+ #to remove not found fields
465
+ # List of keys to check for 'Not found'
466
+ keys_to_check = ["name", "contact_number", "Designation", "email", "Location", "Link", "Company"]
467
+
468
+ # Replace 'Not found' with an empty list for each key
469
+ for key in keys_to_check:
470
+ if processed_data[key] == ['Not found'] or processed_data[key] == ['not found'] or processed_data[key] == []:
471
+ del processed_data[key]
472
+
473
+ return processed_data