gauri-sharan's picture
Create app.py
17d7bf7 verified
raw
history blame
6.86 kB
import streamlit as st
import spacy
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
from pyhealth.metrics import binary_metrics
import mlflow
import logging
from system_monitor import SystemMonitor # Custom AIOPS module
import torch
from transformers import pipeline
class AdvancedResumeProcessor:
def __init__(self):
self.nlp = spacy.load("en_core_web_trf")
self.sentence_model = SentenceTransformer('all-mpnet-base-v2')
self.system_monitor = SystemMonitor()
self.logger = logging.getLogger('mlops')
self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None
# MLOps setup
mlflow.set_tracking_uri("http://localhost:5000")
self.experiment = mlflow.start_run()
def _extract_entities(self, text):
"""Enhanced entity extraction with custom categories"""
doc = self.nlp(text)
return {
'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'],
'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'],
'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE']
}
def _generate_features(self, jd_entities, resume_text):
"""Generate multi-modal features"""
resume_entities = self._extract_entities(resume_text)
# Semantic similarity
jd_embed = self.sentence_model.encode([resume_text])[0]
resume_embed = self.sentence_model.encode([resume_text])[0]
semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0]
# Entity matching scores
skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills']))
return {
'semantic_similarity': semantic_sim,
'skill_match': skill_match,
'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education']))
}
def train_model(self, X, y):
"""MLOps enabled training pipeline"""
with mlflow.start_run():
preprocessor = ColumnTransformer([
('text', Pipeline([
('embed', SentenceTransformer('all-mpnet-base-v2')),
('scaler', StandardScaler())
]), 'resume_text')
])
model = Pipeline([
('preproc', preprocessor),
('regressor', GradientBoostingRegressor())
])
model.fit(X, y)
mlflow.sklearn.log_model(model, "model")
return model
class MLOpsDashboard:
def __init__(self):
self.metrics = {
'model_performance': [],
'system_health': [],
'data_quality': []
}
def update_metrics(self, new_metrics):
for k, v in new_metrics.items():
self.metrics[k].append(v)
def main():
st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide")
st.title("πŸš€ Next-Gen Resume Ranking System with AIOPs/MLOps")
processor = AdvancedResumeProcessor()
dashboard = MLOpsDashboard()
with st.sidebar:
st.header("AIOPs Dashboard")
processor.system_monitor.display_metrics()
st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU")
st.header("MLOps Controls")
retrain = st.button("Retrain Production Model")
if retrain:
with st.spinner("Retraining model..."):
# Add retraining logic here
st.success("Model updated in production!")
main_col1, main_col2 = st.columns([3, 2])
with main_col1:
st.header("Upload Files")
jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"])
resume_files = st.file_uploader("Resumes (PDF/TXT)",
type=["pdf", "txt"],
accept_multiple_files=True)
if jd_file and resume_files:
try:
# Process job description
jd_text = processor.extract_text(jd_file)
jd_entities = processor._extract_entities(jd_text)
# Process resumes and generate features
results = []
for file in resume_files:
resume_text = processor.extract_text(file)
features = processor._generate_features(jd_entities, resume_text)
# Generate LLM feedback
llm_feedback = processor.llm(
f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}"
)[0]['generated_text'] if processor.llm else "LLM unavailable"
results.append({
"Filename": file.name,
**features,
"LLM Feedback": llm_feedback[:200] + "..."
})
# Display results
df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False)
st.subheader("Ranking Results with Explainability")
st.dataframe(
df,
column_config={
"semantic_similarity": "Semantic Match",
"skill_match": "Skill Matches",
"education_match": "Education Match"
},
use_container_width=True
)
# MLOps logging
dashboard.update_metrics({
'model_performance': df['semantic_similarity'].mean(),
'data_quality': len(resume_files)
})
except Exception as e:
processor.logger.error(f"Processing error: {str(e)}")
st.error(f"System error: {str(e)}")
with main_col2:
st.header("Model Explainability")
if 'df' in locals():
st.plotly_chart(create_shap_plot(df)) # Implement SHAP visualization
st.download_button("Export Evaluation Report",
generate_report(df),
file_name="ranking_report.pdf")
st.header("LLM Feedback Analysis")
if 'df' in locals():
st.table(df[["Filename", "LLM Feedback"]].set_index("Filename"))
if __name__ == "__main__":
main()