File size: 6,857 Bytes
17d7bf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import streamlit as st
import spacy
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
from pyhealth.metrics import binary_metrics
import mlflow
import logging
from system_monitor import SystemMonitor # Custom AIOPS module
import torch
from transformers import pipeline
class AdvancedResumeProcessor:
def __init__(self):
self.nlp = spacy.load("en_core_web_trf")
self.sentence_model = SentenceTransformer('all-mpnet-base-v2')
self.system_monitor = SystemMonitor()
self.logger = logging.getLogger('mlops')
self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None
# MLOps setup
mlflow.set_tracking_uri("http://localhost:5000")
self.experiment = mlflow.start_run()
def _extract_entities(self, text):
"""Enhanced entity extraction with custom categories"""
doc = self.nlp(text)
return {
'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'],
'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'],
'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE']
}
def _generate_features(self, jd_entities, resume_text):
"""Generate multi-modal features"""
resume_entities = self._extract_entities(resume_text)
# Semantic similarity
jd_embed = self.sentence_model.encode([resume_text])[0]
resume_embed = self.sentence_model.encode([resume_text])[0]
semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0]
# Entity matching scores
skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills']))
return {
'semantic_similarity': semantic_sim,
'skill_match': skill_match,
'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education']))
}
def train_model(self, X, y):
"""MLOps enabled training pipeline"""
with mlflow.start_run():
preprocessor = ColumnTransformer([
('text', Pipeline([
('embed', SentenceTransformer('all-mpnet-base-v2')),
('scaler', StandardScaler())
]), 'resume_text')
])
model = Pipeline([
('preproc', preprocessor),
('regressor', GradientBoostingRegressor())
])
model.fit(X, y)
mlflow.sklearn.log_model(model, "model")
return model
class MLOpsDashboard:
def __init__(self):
self.metrics = {
'model_performance': [],
'system_health': [],
'data_quality': []
}
def update_metrics(self, new_metrics):
for k, v in new_metrics.items():
self.metrics[k].append(v)
def main():
st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide")
st.title("π Next-Gen Resume Ranking System with AIOPs/MLOps")
processor = AdvancedResumeProcessor()
dashboard = MLOpsDashboard()
with st.sidebar:
st.header("AIOPs Dashboard")
processor.system_monitor.display_metrics()
st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU")
st.header("MLOps Controls")
retrain = st.button("Retrain Production Model")
if retrain:
with st.spinner("Retraining model..."):
# Add retraining logic here
st.success("Model updated in production!")
main_col1, main_col2 = st.columns([3, 2])
with main_col1:
st.header("Upload Files")
jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"])
resume_files = st.file_uploader("Resumes (PDF/TXT)",
type=["pdf", "txt"],
accept_multiple_files=True)
if jd_file and resume_files:
try:
# Process job description
jd_text = processor.extract_text(jd_file)
jd_entities = processor._extract_entities(jd_text)
# Process resumes and generate features
results = []
for file in resume_files:
resume_text = processor.extract_text(file)
features = processor._generate_features(jd_entities, resume_text)
# Generate LLM feedback
llm_feedback = processor.llm(
f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}"
)[0]['generated_text'] if processor.llm else "LLM unavailable"
results.append({
"Filename": file.name,
**features,
"LLM Feedback": llm_feedback[:200] + "..."
})
# Display results
df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False)
st.subheader("Ranking Results with Explainability")
st.dataframe(
df,
column_config={
"semantic_similarity": "Semantic Match",
"skill_match": "Skill Matches",
"education_match": "Education Match"
},
use_container_width=True
)
# MLOps logging
dashboard.update_metrics({
'model_performance': df['semantic_similarity'].mean(),
'data_quality': len(resume_files)
})
except Exception as e:
processor.logger.error(f"Processing error: {str(e)}")
st.error(f"System error: {str(e)}")
with main_col2:
st.header("Model Explainability")
if 'df' in locals():
st.plotly_chart(create_shap_plot(df)) # Implement SHAP visualization
st.download_button("Export Evaluation Report",
generate_report(df),
file_name="ranking_report.pdf")
st.header("LLM Feedback Analysis")
if 'df' in locals():
st.table(df[["Filename", "LLM Feedback"]].set_index("Filename"))
if __name__ == "__main__":
main()
|