|
import streamlit as st |
|
import spacy |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.compose import ColumnTransformer |
|
from sklearn.ensemble import GradientBoostingRegressor |
|
from sklearn.preprocessing import StandardScaler |
|
from sentence_transformers import SentenceTransformer |
|
from pyhealth.metrics import binary_metrics |
|
import mlflow |
|
import logging |
|
from system_monitor import SystemMonitor |
|
import torch |
|
from transformers import pipeline |
|
|
|
class AdvancedResumeProcessor: |
|
def __init__(self): |
|
self.nlp = spacy.load("en_core_web_trf") |
|
self.sentence_model = SentenceTransformer('all-mpnet-base-v2') |
|
self.system_monitor = SystemMonitor() |
|
self.logger = logging.getLogger('mlops') |
|
self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None |
|
|
|
|
|
mlflow.set_tracking_uri("http://localhost:5000") |
|
self.experiment = mlflow.start_run() |
|
|
|
def _extract_entities(self, text): |
|
"""Enhanced entity extraction with custom categories""" |
|
doc = self.nlp(text) |
|
return { |
|
'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'], |
|
'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'], |
|
'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE'] |
|
} |
|
|
|
def _generate_features(self, jd_entities, resume_text): |
|
"""Generate multi-modal features""" |
|
resume_entities = self._extract_entities(resume_text) |
|
|
|
|
|
jd_embed = self.sentence_model.encode([resume_text])[0] |
|
resume_embed = self.sentence_model.encode([resume_text])[0] |
|
semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0] |
|
|
|
|
|
skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills'])) |
|
|
|
return { |
|
'semantic_similarity': semantic_sim, |
|
'skill_match': skill_match, |
|
'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education'])) |
|
} |
|
|
|
def train_model(self, X, y): |
|
"""MLOps enabled training pipeline""" |
|
with mlflow.start_run(): |
|
preprocessor = ColumnTransformer([ |
|
('text', Pipeline([ |
|
('embed', SentenceTransformer('all-mpnet-base-v2')), |
|
('scaler', StandardScaler()) |
|
]), 'resume_text') |
|
]) |
|
|
|
model = Pipeline([ |
|
('preproc', preprocessor), |
|
('regressor', GradientBoostingRegressor()) |
|
]) |
|
|
|
model.fit(X, y) |
|
mlflow.sklearn.log_model(model, "model") |
|
return model |
|
|
|
class MLOpsDashboard: |
|
def __init__(self): |
|
self.metrics = { |
|
'model_performance': [], |
|
'system_health': [], |
|
'data_quality': [] |
|
} |
|
|
|
def update_metrics(self, new_metrics): |
|
for k, v in new_metrics.items(): |
|
self.metrics[k].append(v) |
|
|
|
def main(): |
|
st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide") |
|
st.title("π Next-Gen Resume Ranking System with AIOPs/MLOps") |
|
|
|
processor = AdvancedResumeProcessor() |
|
dashboard = MLOpsDashboard() |
|
|
|
with st.sidebar: |
|
st.header("AIOPs Dashboard") |
|
processor.system_monitor.display_metrics() |
|
st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU") |
|
|
|
st.header("MLOps Controls") |
|
retrain = st.button("Retrain Production Model") |
|
if retrain: |
|
with st.spinner("Retraining model..."): |
|
|
|
st.success("Model updated in production!") |
|
|
|
main_col1, main_col2 = st.columns([3, 2]) |
|
|
|
with main_col1: |
|
st.header("Upload Files") |
|
jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"]) |
|
resume_files = st.file_uploader("Resumes (PDF/TXT)", |
|
type=["pdf", "txt"], |
|
accept_multiple_files=True) |
|
|
|
if jd_file and resume_files: |
|
try: |
|
|
|
jd_text = processor.extract_text(jd_file) |
|
jd_entities = processor._extract_entities(jd_text) |
|
|
|
|
|
results = [] |
|
for file in resume_files: |
|
resume_text = processor.extract_text(file) |
|
features = processor._generate_features(jd_entities, resume_text) |
|
|
|
|
|
llm_feedback = processor.llm( |
|
f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}" |
|
)[0]['generated_text'] if processor.llm else "LLM unavailable" |
|
|
|
results.append({ |
|
"Filename": file.name, |
|
**features, |
|
"LLM Feedback": llm_feedback[:200] + "..." |
|
}) |
|
|
|
|
|
df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False) |
|
st.subheader("Ranking Results with Explainability") |
|
st.dataframe( |
|
df, |
|
column_config={ |
|
"semantic_similarity": "Semantic Match", |
|
"skill_match": "Skill Matches", |
|
"education_match": "Education Match" |
|
}, |
|
use_container_width=True |
|
) |
|
|
|
|
|
dashboard.update_metrics({ |
|
'model_performance': df['semantic_similarity'].mean(), |
|
'data_quality': len(resume_files) |
|
}) |
|
|
|
except Exception as e: |
|
processor.logger.error(f"Processing error: {str(e)}") |
|
st.error(f"System error: {str(e)}") |
|
|
|
with main_col2: |
|
st.header("Model Explainability") |
|
if 'df' in locals(): |
|
st.plotly_chart(create_shap_plot(df)) |
|
st.download_button("Export Evaluation Report", |
|
generate_report(df), |
|
file_name="ranking_report.pdf") |
|
|
|
st.header("LLM Feedback Analysis") |
|
if 'df' in locals(): |
|
st.table(df[["Filename", "LLM Feedback"]].set_index("Filename")) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|