gauri-sharan's picture
Update app.py
73c3334 verified
import os
import streamlit as st
import spacy
import PyPDF2
import pandas as pd
import time
from datetime import datetime
import openai
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from collections import defaultdict
class ResumeProcessor:
def __init__(self):
self.nlp = spacy.load("en_core_web_lg")
self.vectorizer = TfidfVectorizer(stop_words='english')
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
def extract_text_from_pdf(self, file):
reader = PyPDF2.PdfReader(file)
return ' '.join([page.extract_text() for page in reader.pages])
def preprocess_text(self, text):
doc = self.nlp(text)
tokens = [token.lemma_.lower() for token in doc
if not token.is_stop and not token.is_punct]
return ' '.join(tokens)
def extract_entities(self, text):
doc = self.nlp(text)
entities = defaultdict(set)
for ent in doc.ents:
if ent.label_ in ['ORG', 'PERSON', 'GPE', 'EDU', 'SKILL']:
entities[ent.label_].add(ent.text.lower())
return entities
def calculate_similarity(self, jd_text, resumes):
processed_jd = self.preprocess_text(jd_text)
processed_resumes = [self.preprocess_text(resume) for resume in resumes]
tfidf_matrix = self.vectorizer.fit_transform([processed_jd] + processed_resumes)
tfidf_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]
jd_embedding = self.sentence_model.encode([processed_jd])
resume_embeddings = self.sentence_model.encode(processed_resumes)
semantic_scores = cosine_similarity(jd_embedding, resume_embeddings)[0]
jd_entities = self.extract_entities(jd_text)
entity_scores = []
for resume in resumes:
resume_entities = self.extract_entities(resume)
score = sum(len(jd_entities[key] & resume_entities[key])
for key in jd_entities) / max(len(jd_entities), 1)
entity_scores.append(score)
combined_scores = (tfidf_scores + semantic_scores + entity_scores) / 3
return combined_scores, tfidf_matrix, jd_entities
def get_top_terms(vector, feature_names, top_n=10):
if vector.nnz == 0:
return []
indices = vector.indices
data = vector.data
sorted_terms = sorted(zip(indices, data), key=lambda x: -x[1])
return [feature_names[idx] for idx, _ in sorted_terms[:top_n]]
def generate_llm_feedback(jd, resume):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{
"role": "user",
"content": f"Job Description:\n{jd}\n\nResume:\n{resume}\n\nProvide brief feedback on resume suitability."
}]
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating feedback: {str(e)}"
def main():
st.set_page_config(page_title="Resume Ranker Pro", layout="wide")
st.title("πŸš€ AI-Powered Resume Ranking System 2.0")
if 'metrics' not in st.session_state:
st.session_state.metrics = {
'total_processed': 0,
'avg_time': 0,
'last_processed': None,
'errors': []
}
processor = ResumeProcessor()
with st.sidebar:
st.header("βš™οΈ Configuration")
jd_file = st.file_uploader("Job Description (TXT)", type="txt")
resume_files = st.file_uploader("Resumes (PDF/TXT)",
type=["pdf", "txt"],
accept_multiple_files=True)
st.divider()
st.header("πŸ“Š AIOPS Monitoring")
st.metric("Total Processed", st.session_state.metrics['total_processed'])
st.metric("Avg Processing Time", f"{st.session_state.metrics['avg_time']:.2f}s")
st.metric("Last Processed", st.session_state.metrics['last_processed'] or "Never")
st.divider()
st.header("πŸ”§ MLOps Settings")
st.write("Model Version: 1.1.0")
if st.button("Retrain Model (Mock)"):
with st.spinner("Simulating retraining..."):
time.sleep(2)
st.success("Model updated to v1.1.1")
st.divider()
llm_enabled = st.checkbox("Enable LLM Feedback")
# Get OpenAI key from environment variable
openai_key = os.environ.get("OPENAI_API_KEY")
# Only show API key input if not running in production environment
if not openai_key and llm_enabled:
openai_key = st.text_input("OpenAI API Key", type="password")
if llm_enabled:
openai.api_key = openai_key
if jd_file and resume_files:
start_time = time.time()
try:
jd_text = jd_file.read().decode()
resume_texts = []
for file in resume_files:
if file.type == "application/pdf":
text = processor.extract_text_from_pdf(file)
else:
text = file.read().decode()
resume_texts.append(text)
scores, tfidf_matrix, jd_entities = processor.calculate_similarity(jd_text, resume_texts)
feature_names = processor.vectorizer.get_feature_names_out()
jd_top_terms = get_top_terms(tfidf_matrix[0], feature_names)
results = []
for i, (score, text) in enumerate(zip(scores, resume_texts)):
resume_vector = tfidf_matrix[i+1]
resume_terms = get_top_terms(resume_vector, feature_names)
common_terms = set(jd_top_terms) & set(resume_terms)
resume_entities = processor.extract_entities(text)
matched_entities = []
for key in jd_entities:
matched_entities.extend(jd_entities[key] & resume_entities.get(key, set()))
results.append({
"Filename": resume_files[i].name,
"Score": score,
"Top Terms": ", ".join(common_terms),
"Matched Entities": ", ".join(matched_entities),
"Resume Text": text
})
df = pd.DataFrame(results).sort_values("Score", ascending=False)
st.subheader("πŸ“Š Ranking Results")
st.dataframe(
df[["Filename", "Score", "Top Terms", "Matched Entities"]],
column_config={
"Score": st.column_config.ProgressColumn(
format="%.4f",
min_value=0,
max_value=1.0
)
},
use_container_width=True,
hide_index=True
)
if llm_enabled and openai_key:
st.subheader("🧠 LLM Feedback")
for idx, row in df.iterrows():
with st.expander(f"Feedback for {row['Filename']}"):
feedback = generate_llm_feedback(jd_text, row['Resume Text'])
st.write(feedback)
processing_time = time.time() - start_time
st.session_state.metrics['total_processed'] += len(resume_files)
st.session_state.metrics['avg_time'] = (
st.session_state.metrics['avg_time'] * (st.session_state.metrics['total_processed'] - len(resume_files)) +
processing_time
) / st.session_state.metrics['total_processed']
st.session_state.metrics['last_processed'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
st.error(f"Error processing files: {str(e)}")
st.session_state.metrics['errors'].append(str(e))
if __name__ == "__main__":
main()