File size: 4,609 Bytes
1c6412d
cb632ff
d728f53
46a124f
 
 
 
1ae1b13
46a124f
 
 
 
 
cb632ff
 
 
 
 
 
8c8e3c8
46a124f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529c4d0
 
46a124f
 
 
 
 
 
 
 
 
529c4d0
 
 
46a124f
529c4d0
46a124f
d728f53
46a124f
 
d728f53
529c4d0
d728f53
529c4d0
8c8e3c8
1ae1b13
6a3b909
8c8e3c8
6a3b909
 
46a124f
 
 
 
 
 
 
6a3b909
46a124f
529c4d0
 
46a124f
529c4d0
 
d728f53
46a124f
 
d728f53
529c4d0
46a124f
d728f53
529c4d0
 
 
 
46a124f
 
 
 
 
d728f53
46a124f
 
 
6a3b909
46a124f
6a3b909
1ae1b13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
from sentence_transformers import SentenceTransformer, util
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download NLTK data files
nltk.download("stopwords")
nltk.download("punkt")

# Load English stop words
stop_words = set(stopwords.words("english"))

@st.cache_resource
def load_model():
    return SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

model = load_model()

# Synonym dictionary for common terms
synonyms = {
    "data analysis": {"data analytics", "data analyst"},
    "machine learning": {"ml", "artificial intelligence", "ai"},
    "programming": {"coding", "development", "software engineering"},
    "statistical analysis": {"statistics", "statistical modeling"},
    "visualization": {"data viz", "tableau", "visualizing data"}
}

def preprocess(text):
    # Tokenize, remove stop words, and normalize text
    words = word_tokenize(text.lower())
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    normalized_text = " ".join(filtered_words)
    return normalized_text

def synonym_match(job_desc, resume):
    match_count = 0
    total_keywords = 0
    matched_keywords = set()
    missing_keywords = set()
    
    for key, variants in synonyms.items():
        job_contains = any(term in job_desc for term in variants) or key in job_desc
        resume_contains = any(term in resume for term in variants) or key in resume
        
        if job_contains:
            total_keywords += 1
            if resume_contains:
                match_count += 1
                matched_keywords.add(key)
            else:
                missing_keywords.add(key)
                
    return (match_count / total_keywords) * 100 if total_keywords > 0 else 0, list(matched_keywords)[:5], list(missing_keywords)[:5]

def keyword_match(job_desc, resume):
    job_keywords = set(re.findall(r'\b\w+\b', job_desc))
    resume_keywords = set(re.findall(r'\b\w+\b', resume))
    common_keywords = job_keywords.intersection(resume_keywords)
    return (len(common_keywords) / len(job_keywords)) * 100 if job_keywords else 0, list(common_keywords)[:5]

st.title("Resume and Job Description Similarity Checker")

job_description = st.text_area("Paste any job description here:", height=200)
resume_text = st.text_area("Paste your resume here:", height=200)

if st.button("Compare"):
    if job_description.strip() and resume_text.strip():
        # Preprocess text
        processed_job_desc = preprocess(job_description)
        processed_resume = preprocess(resume_text)

        # Calculate embeddings-based similarity
        job_description_embedding = model.encode(processed_job_desc)
        resume_embedding = model.encode(processed_resume)
        similarity_score = util.cos_sim(job_description_embedding, resume_embedding).item() * 100

        # Calculate keyword-based similarity and matched keywords
        keyword_score, matched_keywords = keyword_match(processed_job_desc, processed_resume)

        # Calculate synonym-based similarity and missing skills
        synonym_score, synonym_matches, synonym_misses = synonym_match(processed_job_desc, processed_resume)
        
        # Combine scores (adjusting weights as needed)
        overall_score = (similarity_score * 0.5) + (keyword_score * 0.3) + (synonym_score * 0.2)
        
        # Display the overall similarity score
        st.write(f"**Overall Similarity Score:** {overall_score:.2f}%")
        
        # Display matched keywords and missing skills
        st.write("**Matched Keywords:**", ", ".join(matched_keywords + synonym_matches)[:5])
        st.write("**Missing Skills to Consider Adding:**", ", ".join(synonym_misses)[:5])

        # Adjusted feedback based on combined score
        if overall_score > 80:
            st.success("Excellent match! Your resume closely aligns with the job description.")
        elif overall_score > 65:
            st.info("Strong match! Your resume aligns well, but a few minor tweaks could help.")
        elif overall_score > 50:
            st.warning("Moderate match. Your resume has some relevant information, but consider emphasizing key skills.")
        elif overall_score > 35:
            st.error("Low match. Your resume does not align well. Consider revising to highlight key skills.")
        else:
            st.error("Very low match. Your resume is significantly different from the job description. Major revisions may be needed.")
    else:
        st.error("Please paste both the job description and your resume to proceed.")