Spaces:
Sleeping
Sleeping
File size: 4,609 Bytes
1c6412d cb632ff d728f53 46a124f 1ae1b13 46a124f cb632ff 8c8e3c8 46a124f 529c4d0 46a124f 529c4d0 46a124f 529c4d0 46a124f d728f53 46a124f d728f53 529c4d0 d728f53 529c4d0 8c8e3c8 1ae1b13 6a3b909 8c8e3c8 6a3b909 46a124f 6a3b909 46a124f 529c4d0 46a124f 529c4d0 d728f53 46a124f d728f53 529c4d0 46a124f d728f53 529c4d0 46a124f d728f53 46a124f 6a3b909 46a124f 6a3b909 1ae1b13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
from sentence_transformers import SentenceTransformer, util
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download NLTK data files
nltk.download("stopwords")
nltk.download("punkt")
# Load English stop words
stop_words = set(stopwords.words("english"))
@st.cache_resource
def load_model():
return SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
model = load_model()
# Synonym dictionary for common terms
synonyms = {
"data analysis": {"data analytics", "data analyst"},
"machine learning": {"ml", "artificial intelligence", "ai"},
"programming": {"coding", "development", "software engineering"},
"statistical analysis": {"statistics", "statistical modeling"},
"visualization": {"data viz", "tableau", "visualizing data"}
}
def preprocess(text):
# Tokenize, remove stop words, and normalize text
words = word_tokenize(text.lower())
filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
normalized_text = " ".join(filtered_words)
return normalized_text
def synonym_match(job_desc, resume):
match_count = 0
total_keywords = 0
matched_keywords = set()
missing_keywords = set()
for key, variants in synonyms.items():
job_contains = any(term in job_desc for term in variants) or key in job_desc
resume_contains = any(term in resume for term in variants) or key in resume
if job_contains:
total_keywords += 1
if resume_contains:
match_count += 1
matched_keywords.add(key)
else:
missing_keywords.add(key)
return (match_count / total_keywords) * 100 if total_keywords > 0 else 0, list(matched_keywords)[:5], list(missing_keywords)[:5]
def keyword_match(job_desc, resume):
job_keywords = set(re.findall(r'\b\w+\b', job_desc))
resume_keywords = set(re.findall(r'\b\w+\b', resume))
common_keywords = job_keywords.intersection(resume_keywords)
return (len(common_keywords) / len(job_keywords)) * 100 if job_keywords else 0, list(common_keywords)[:5]
st.title("Resume and Job Description Similarity Checker")
job_description = st.text_area("Paste any job description here:", height=200)
resume_text = st.text_area("Paste your resume here:", height=200)
if st.button("Compare"):
if job_description.strip() and resume_text.strip():
# Preprocess text
processed_job_desc = preprocess(job_description)
processed_resume = preprocess(resume_text)
# Calculate embeddings-based similarity
job_description_embedding = model.encode(processed_job_desc)
resume_embedding = model.encode(processed_resume)
similarity_score = util.cos_sim(job_description_embedding, resume_embedding).item() * 100
# Calculate keyword-based similarity and matched keywords
keyword_score, matched_keywords = keyword_match(processed_job_desc, processed_resume)
# Calculate synonym-based similarity and missing skills
synonym_score, synonym_matches, synonym_misses = synonym_match(processed_job_desc, processed_resume)
# Combine scores (adjusting weights as needed)
overall_score = (similarity_score * 0.5) + (keyword_score * 0.3) + (synonym_score * 0.2)
# Display the overall similarity score
st.write(f"**Overall Similarity Score:** {overall_score:.2f}%")
# Display matched keywords and missing skills
st.write("**Matched Keywords:**", ", ".join(matched_keywords + synonym_matches)[:5])
st.write("**Missing Skills to Consider Adding:**", ", ".join(synonym_misses)[:5])
# Adjusted feedback based on combined score
if overall_score > 80:
st.success("Excellent match! Your resume closely aligns with the job description.")
elif overall_score > 65:
st.info("Strong match! Your resume aligns well, but a few minor tweaks could help.")
elif overall_score > 50:
st.warning("Moderate match. Your resume has some relevant information, but consider emphasizing key skills.")
elif overall_score > 35:
st.error("Low match. Your resume does not align well. Consider revising to highlight key skills.")
else:
st.error("Very low match. Your resume is significantly different from the job description. Major revisions may be needed.")
else:
st.error("Please paste both the job description and your resume to proceed.") |