File size: 6,857 Bytes
17d7bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import streamlit as st
import spacy
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer
from pyhealth.metrics import binary_metrics
import mlflow
import logging
from system_monitor import SystemMonitor  # Custom AIOPS module
import torch
from transformers import pipeline

class AdvancedResumeProcessor:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_trf")
        self.sentence_model = SentenceTransformer('all-mpnet-base-v2')
        self.system_monitor = SystemMonitor()
        self.logger = logging.getLogger('mlops')
        self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None
        
        # MLOps setup
        mlflow.set_tracking_uri("http://localhost:5000")
        self.experiment = mlflow.start_run()
        
    def _extract_entities(self, text):
        """Enhanced entity extraction with custom categories"""
        doc = self.nlp(text)
        return {
            'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'],
            'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'],
            'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE']
        }

    def _generate_features(self, jd_entities, resume_text):
        """Generate multi-modal features"""
        resume_entities = self._extract_entities(resume_text)
        
        # Semantic similarity
        jd_embed = self.sentence_model.encode([resume_text])[0]
        resume_embed = self.sentence_model.encode([resume_text])[0]
        semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0]
        
        # Entity matching scores
        skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills']))
        
        return {
            'semantic_similarity': semantic_sim,
            'skill_match': skill_match,
            'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education']))
        }

    def train_model(self, X, y):
        """MLOps enabled training pipeline"""
        with mlflow.start_run():
            preprocessor = ColumnTransformer([
                ('text', Pipeline([
                    ('embed', SentenceTransformer('all-mpnet-base-v2')),
                    ('scaler', StandardScaler())
                ]), 'resume_text')
            ])
            
            model = Pipeline([
                ('preproc', preprocessor),
                ('regressor', GradientBoostingRegressor())
            ])
            
            model.fit(X, y)
            mlflow.sklearn.log_model(model, "model")
            return model

class MLOpsDashboard:
    def __init__(self):
        self.metrics = {
            'model_performance': [],
            'system_health': [],
            'data_quality': []
        }
    
    def update_metrics(self, new_metrics):
        for k, v in new_metrics.items():
            self.metrics[k].append(v)

def main():
    st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide")
    st.title("πŸš€ Next-Gen Resume Ranking System with AIOPs/MLOps")
    
    processor = AdvancedResumeProcessor()
    dashboard = MLOpsDashboard()
    
    with st.sidebar:
        st.header("AIOPs Dashboard")
        processor.system_monitor.display_metrics()
        st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU")
        
        st.header("MLOps Controls")
        retrain = st.button("Retrain Production Model")
        if retrain:
            with st.spinner("Retraining model..."):
                # Add retraining logic here
                st.success("Model updated in production!")

    main_col1, main_col2 = st.columns([3, 2])
    
    with main_col1:
        st.header("Upload Files")
        jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"])
        resume_files = st.file_uploader("Resumes (PDF/TXT)", 
                                      type=["pdf", "txt"],
                                      accept_multiple_files=True)
        
        if jd_file and resume_files:
            try:
                # Process job description
                jd_text = processor.extract_text(jd_file)
                jd_entities = processor._extract_entities(jd_text)
                
                # Process resumes and generate features
                results = []
                for file in resume_files:
                    resume_text = processor.extract_text(file)
                    features = processor._generate_features(jd_entities, resume_text)
                    
                    # Generate LLM feedback
                    llm_feedback = processor.llm(
                        f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}"
                    )[0]['generated_text'] if processor.llm else "LLM unavailable"
                    
                    results.append({
                        "Filename": file.name,
                        **features,
                        "LLM Feedback": llm_feedback[:200] + "..."
                    })
                
                # Display results
                df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False)
                st.subheader("Ranking Results with Explainability")
                st.dataframe(
                    df,
                    column_config={
                        "semantic_similarity": "Semantic Match",
                        "skill_match": "Skill Matches",
                        "education_match": "Education Match"
                    },
                    use_container_width=True
                )
                
                # MLOps logging
                dashboard.update_metrics({
                    'model_performance': df['semantic_similarity'].mean(),
                    'data_quality': len(resume_files)
                })
                
            except Exception as e:
                processor.logger.error(f"Processing error: {str(e)}")
                st.error(f"System error: {str(e)}")

    with main_col2:
        st.header("Model Explainability")
        if 'df' in locals():
            st.plotly_chart(create_shap_plot(df))  # Implement SHAP visualization
            st.download_button("Export Evaluation Report", 
                              generate_report(df), 
                              file_name="ranking_report.pdf")
            
        st.header("LLM Feedback Analysis")
        if 'df' in locals():
            st.table(df[["Filename", "LLM Feedback"]].set_index("Filename"))

if __name__ == "__main__":
    main()