|
import os |
|
import streamlit as st |
|
import spacy |
|
import PyPDF2 |
|
import pandas as pd |
|
import time |
|
from datetime import datetime |
|
import openai |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from sentence_transformers import SentenceTransformer |
|
from collections import defaultdict |
|
|
|
class ResumeProcessor: |
|
def __init__(self): |
|
self.nlp = spacy.load("en_core_web_lg") |
|
self.vectorizer = TfidfVectorizer(stop_words='english') |
|
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
def extract_text_from_pdf(self, file): |
|
reader = PyPDF2.PdfReader(file) |
|
return ' '.join([page.extract_text() for page in reader.pages]) |
|
|
|
def preprocess_text(self, text): |
|
doc = self.nlp(text) |
|
tokens = [token.lemma_.lower() for token in doc |
|
if not token.is_stop and not token.is_punct] |
|
return ' '.join(tokens) |
|
|
|
def extract_entities(self, text): |
|
doc = self.nlp(text) |
|
entities = defaultdict(set) |
|
for ent in doc.ents: |
|
if ent.label_ in ['ORG', 'PERSON', 'GPE', 'EDU', 'SKILL']: |
|
entities[ent.label_].add(ent.text.lower()) |
|
return entities |
|
|
|
def calculate_similarity(self, jd_text, resumes): |
|
processed_jd = self.preprocess_text(jd_text) |
|
processed_resumes = [self.preprocess_text(resume) for resume in resumes] |
|
|
|
tfidf_matrix = self.vectorizer.fit_transform([processed_jd] + processed_resumes) |
|
tfidf_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0] |
|
|
|
jd_embedding = self.sentence_model.encode([processed_jd]) |
|
resume_embeddings = self.sentence_model.encode(processed_resumes) |
|
semantic_scores = cosine_similarity(jd_embedding, resume_embeddings)[0] |
|
|
|
jd_entities = self.extract_entities(jd_text) |
|
entity_scores = [] |
|
for resume in resumes: |
|
resume_entities = self.extract_entities(resume) |
|
score = sum(len(jd_entities[key] & resume_entities[key]) |
|
for key in jd_entities) / max(len(jd_entities), 1) |
|
entity_scores.append(score) |
|
|
|
combined_scores = (tfidf_scores + semantic_scores + entity_scores) / 3 |
|
return combined_scores, tfidf_matrix, jd_entities |
|
|
|
def get_top_terms(vector, feature_names, top_n=10): |
|
if vector.nnz == 0: |
|
return [] |
|
indices = vector.indices |
|
data = vector.data |
|
sorted_terms = sorted(zip(indices, data), key=lambda x: -x[1]) |
|
return [feature_names[idx] for idx, _ in sorted_terms[:top_n]] |
|
|
|
def generate_llm_feedback(jd, resume): |
|
try: |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{ |
|
"role": "user", |
|
"content": f"Job Description:\n{jd}\n\nResume:\n{resume}\n\nProvide brief feedback on resume suitability." |
|
}] |
|
) |
|
return response.choices[0].message.content |
|
except Exception as e: |
|
return f"Error generating feedback: {str(e)}" |
|
|
|
def main(): |
|
st.set_page_config(page_title="Resume Ranker Pro", layout="wide") |
|
st.title("π AI-Powered Resume Ranking System 2.0") |
|
|
|
if 'metrics' not in st.session_state: |
|
st.session_state.metrics = { |
|
'total_processed': 0, |
|
'avg_time': 0, |
|
'last_processed': None, |
|
'errors': [] |
|
} |
|
|
|
processor = ResumeProcessor() |
|
|
|
with st.sidebar: |
|
st.header("βοΈ Configuration") |
|
jd_file = st.file_uploader("Job Description (TXT)", type="txt") |
|
resume_files = st.file_uploader("Resumes (PDF/TXT)", |
|
type=["pdf", "txt"], |
|
accept_multiple_files=True) |
|
|
|
st.divider() |
|
st.header("π AIOPS Monitoring") |
|
st.metric("Total Processed", st.session_state.metrics['total_processed']) |
|
st.metric("Avg Processing Time", f"{st.session_state.metrics['avg_time']:.2f}s") |
|
st.metric("Last Processed", st.session_state.metrics['last_processed'] or "Never") |
|
|
|
st.divider() |
|
st.header("π§ MLOps Settings") |
|
st.write("Model Version: 1.1.0") |
|
if st.button("Retrain Model (Mock)"): |
|
with st.spinner("Simulating retraining..."): |
|
time.sleep(2) |
|
st.success("Model updated to v1.1.1") |
|
|
|
st.divider() |
|
llm_enabled = st.checkbox("Enable LLM Feedback") |
|
|
|
|
|
openai_key = os.environ.get("OPENAI_API_KEY") |
|
|
|
|
|
if not openai_key and llm_enabled: |
|
openai_key = st.text_input("OpenAI API Key", type="password") |
|
|
|
if llm_enabled: |
|
openai.api_key = openai_key |
|
|
|
if jd_file and resume_files: |
|
start_time = time.time() |
|
try: |
|
jd_text = jd_file.read().decode() |
|
resume_texts = [] |
|
for file in resume_files: |
|
if file.type == "application/pdf": |
|
text = processor.extract_text_from_pdf(file) |
|
else: |
|
text = file.read().decode() |
|
resume_texts.append(text) |
|
|
|
scores, tfidf_matrix, jd_entities = processor.calculate_similarity(jd_text, resume_texts) |
|
feature_names = processor.vectorizer.get_feature_names_out() |
|
jd_top_terms = get_top_terms(tfidf_matrix[0], feature_names) |
|
|
|
results = [] |
|
for i, (score, text) in enumerate(zip(scores, resume_texts)): |
|
resume_vector = tfidf_matrix[i+1] |
|
resume_terms = get_top_terms(resume_vector, feature_names) |
|
common_terms = set(jd_top_terms) & set(resume_terms) |
|
resume_entities = processor.extract_entities(text) |
|
matched_entities = [] |
|
for key in jd_entities: |
|
matched_entities.extend(jd_entities[key] & resume_entities.get(key, set())) |
|
|
|
results.append({ |
|
"Filename": resume_files[i].name, |
|
"Score": score, |
|
"Top Terms": ", ".join(common_terms), |
|
"Matched Entities": ", ".join(matched_entities), |
|
"Resume Text": text |
|
}) |
|
|
|
df = pd.DataFrame(results).sort_values("Score", ascending=False) |
|
|
|
st.subheader("π Ranking Results") |
|
st.dataframe( |
|
df[["Filename", "Score", "Top Terms", "Matched Entities"]], |
|
column_config={ |
|
"Score": st.column_config.ProgressColumn( |
|
format="%.4f", |
|
min_value=0, |
|
max_value=1.0 |
|
) |
|
}, |
|
use_container_width=True, |
|
hide_index=True |
|
) |
|
|
|
if llm_enabled and openai_key: |
|
st.subheader("π§ LLM Feedback") |
|
for idx, row in df.iterrows(): |
|
with st.expander(f"Feedback for {row['Filename']}"): |
|
feedback = generate_llm_feedback(jd_text, row['Resume Text']) |
|
st.write(feedback) |
|
|
|
processing_time = time.time() - start_time |
|
st.session_state.metrics['total_processed'] += len(resume_files) |
|
st.session_state.metrics['avg_time'] = ( |
|
st.session_state.metrics['avg_time'] * (st.session_state.metrics['total_processed'] - len(resume_files)) + |
|
processing_time |
|
) / st.session_state.metrics['total_processed'] |
|
st.session_state.metrics['last_processed'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
except Exception as e: |
|
st.error(f"Error processing files: {str(e)}") |
|
st.session_state.metrics['errors'].append(str(e)) |
|
|
|
if __name__ == "__main__": |
|
main() |