Spaces:
Sleeping
Sleeping
from pdfminer.high_level import extract_text | |
import re | |
from datetime import datetime | |
class ResumeParser: | |
def parse(self, resume_file): | |
"""Extracts text from a PDF resume and processes candidate information.""" | |
text = extract_text(resume_file) | |
return self.extract_candidate_info(text) | |
def extract_candidate_info(self, text): | |
"""Extracts candidate details from the parsed resume text.""" | |
return { | |
"name": self.extract_name(text), | |
"email": self.extract_email(text), | |
"phone": self.extract_phone(text), | |
"experience": self.extract_experience(text), | |
"position": self.extract_position(text), | |
"location": self.extract_location(text), | |
"tech_stack": self.extract_tech_stack(text), | |
} | |
def extract_name(text): | |
"""Extracts the candidate's name from the first line or common patterns.""" | |
# Split text into lines and take the first non-empty line | |
lines = text.splitlines() | |
for line in lines: | |
line = line.strip() | |
if line: # Ignore empty lines | |
# Check for a valid name format (e.g., avoiding single words like "Resume") | |
if len(line.split()) >= 2: # Name should have at least two words | |
return line | |
break | |
return "Name not found" | |
def extract_email(text): | |
"""Extracts the candidate's email address.""" | |
match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text) | |
return match.group(0) if match else "Email not found" | |
def extract_phone(text): | |
"""Extracts the candidate's phone number.""" | |
match = re.search(r"\+?\d{10,13}", text) | |
return match.group(0) if match else "Phone number not found" | |
def extract_position(text): | |
"""Extracts the candidate's position (e.g., Job Title).""" | |
match = re.search(r"(?i)experience(?:\:|\s+)([^\n]+)", text) | |
return match.group(1).strip() if match else "Position not found" | |
def extract_location(text): | |
"""Extracts the candidate's location.""" | |
# Regex to match patterns like 'Location: Bengaluru, Karnataka' or standalone 'Bengaluru, Karnataka' | |
match = re.search(r"(?i)location(?:\:|\s+)([^\n]+)|\b([A-Za-z\s]+,\s*[A-Za-z\s]+)\b", text) | |
if match: | |
# Group 1 matches 'Location: <value>' and Group 2 matches '<City>, <State>' | |
location = match.group(1) or match.group(2) | |
return location.strip() | |
return "Location not found" | |
def extract_tech_stack(text): | |
"""Extracts technical skills dynamically from the skills section.""" | |
# Find the 'Skills' or 'Technical Skills' section in the text | |
match = re.search(r"(?i)(skills|technical skills)(?:\:|\s+)([^\n]+)", text) | |
if match: | |
tech_line = match.group(2).strip() | |
# Split the skills based on common delimiters (comma, semicolon, etc.) | |
skills = re.split(r"[,\;\|]", tech_line) | |
# Strip whitespace and return unique skills | |
return [skill.strip() for skill in skills if skill.strip()] | |
return ["No tech stack found"] | |
def extract_experience(text): | |
"""Extracts and calculates the candidate's total experience based on date ranges.""" | |
# Updated regex pattern to match abbreviated and full month names along with 'Present' | |
date_pattern = r"(\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?) \d{4})" | |
regex = rf"{date_pattern}\s*-\s*({date_pattern}|Present)" | |
matches = re.findall(regex, text, re.IGNORECASE) | |
total_months = 0 | |
for match in matches: | |
start_date_str = match[0] | |
end_date_str = match[1] | |
start_date = ResumeParser.parse_date(start_date_str) | |
end_date = datetime.now() if "Present" in end_date_str else ResumeParser.parse_date(end_date_str) | |
if start_date and end_date: | |
delta = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) | |
total_months += delta | |
years = total_months // 12 | |
months = total_months % 12 | |
return f"{years} years, {months} months" if total_months > 0 else "Experience not found" | |
def parse_date(date_str): | |
"""Parses a date string like 'January 2015' or 'Feb 2024' into a datetime object.""" | |
try: | |
return datetime.strptime(date_str, "%b %Y") # Abbreviated month | |
except ValueError: | |
try: | |
return datetime.strptime(date_str, "%B %Y") # Full month | |
except ValueError: | |
return None | |