File size: 4,942 Bytes
9fec341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from pdfminer.high_level import extract_text
import re
from datetime import datetime

class ResumeParser:
    def parse(self, resume_file):
        """Extracts text from a PDF resume and processes candidate information."""
        text = extract_text(resume_file)
        return self.extract_candidate_info(text)

    def extract_candidate_info(self, text):
        """Extracts candidate details from the parsed resume text."""
        return {
            "name": self.extract_name(text),
            "email": self.extract_email(text),
            "phone": self.extract_phone(text),
            "experience": self.extract_experience(text),
            "position": self.extract_position(text),
            "location": self.extract_location(text),
            "tech_stack": self.extract_tech_stack(text),
        }

    @staticmethod
    def extract_name(text):
        """Extracts the candidate's name from the first line or common patterns."""
        # Split text into lines and take the first non-empty line
        lines = text.splitlines()
        for line in lines:
            line = line.strip()
            if line:  # Ignore empty lines
                # Check for a valid name format (e.g., avoiding single words like "Resume")
                if len(line.split()) >= 2:  # Name should have at least two words
                    return line
                break
        return "Name not found"

    @staticmethod
    def extract_email(text):
        """Extracts the candidate's email address."""
        match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
        return match.group(0) if match else "Email not found"

    @staticmethod
    def extract_phone(text):
        """Extracts the candidate's phone number."""
        match = re.search(r"\+?\d{10,13}", text)
        return match.group(0) if match else "Phone number not found"

    @staticmethod
    def extract_position(text):
        """Extracts the candidate's position (e.g., Job Title)."""
        match = re.search(r"(?i)experience(?:\:|\s+)([^\n]+)", text)
        return match.group(1).strip() if match else "Position not found"

    @staticmethod
    def extract_location(text):
        """Extracts the candidate's location."""
        # Regex to match patterns like 'Location: Bengaluru, Karnataka' or standalone 'Bengaluru, Karnataka'
        match = re.search(r"(?i)location(?:\:|\s+)([^\n]+)|\b([A-Za-z\s]+,\s*[A-Za-z\s]+)\b", text)
        if match:
            # Group 1 matches 'Location: <value>' and Group 2 matches '<City>, <State>'
            location = match.group(1) or match.group(2)
            return location.strip()
        return "Location not found"

    @staticmethod
    def extract_tech_stack(text):
        """Extracts technical skills dynamically from the skills section."""
        # Find the 'Skills' or 'Technical Skills' section in the text
        match = re.search(r"(?i)(skills|technical skills)(?:\:|\s+)([^\n]+)", text)
        if match:
            tech_line = match.group(2).strip()
            # Split the skills based on common delimiters (comma, semicolon, etc.)
            skills = re.split(r"[,\;\|]", tech_line)
            # Strip whitespace and return unique skills
            return [skill.strip() for skill in skills if skill.strip()]
        return ["No tech stack found"]
    @staticmethod
    def extract_experience(text):
        """Extracts and calculates the candidate's total experience based on date ranges."""
        # Updated regex pattern to match abbreviated and full month names along with 'Present'
        date_pattern = r"(\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?) \d{4})"
        regex = rf"{date_pattern}\s*-\s*({date_pattern}|Present)"

        matches = re.findall(regex, text, re.IGNORECASE)

        total_months = 0
        for match in matches:
            start_date_str = match[0]
            end_date_str = match[1]

            start_date = ResumeParser.parse_date(start_date_str)
            end_date = datetime.now() if "Present" in end_date_str else ResumeParser.parse_date(end_date_str)

            if start_date and end_date:
                delta = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month)
                total_months += delta

        years = total_months // 12
        months = total_months % 12

        return f"{years} years, {months} months" if total_months > 0 else "Experience not found"

    @staticmethod
    def parse_date(date_str):
        """Parses a date string like 'January 2015' or 'Feb 2024' into a datetime object."""
        try:
            return datetime.strptime(date_str, "%b %Y")  # Abbreviated month
        except ValueError:
            try:
                return datetime.strptime(date_str, "%B %Y")  # Full month
            except ValueError:
                return None