gauri-sharan commited on
Commit
17d7bf7
·
verified ·
1 Parent(s): 1b2f373

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import spacy
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.compose import ColumnTransformer
7
+ from sklearn.ensemble import GradientBoostingRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sentence_transformers import SentenceTransformer
10
+ from pyhealth.metrics import binary_metrics
11
+ import mlflow
12
+ import logging
13
+ from system_monitor import SystemMonitor # Custom AIOPS module
14
+ import torch
15
+ from transformers import pipeline
16
+
17
+ class AdvancedResumeProcessor:
18
+ def __init__(self):
19
+ self.nlp = spacy.load("en_core_web_trf")
20
+ self.sentence_model = SentenceTransformer('all-mpnet-base-v2')
21
+ self.system_monitor = SystemMonitor()
22
+ self.logger = logging.getLogger('mlops')
23
+ self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None
24
+
25
+ # MLOps setup
26
+ mlflow.set_tracking_uri("http://localhost:5000")
27
+ self.experiment = mlflow.start_run()
28
+
29
+ def _extract_entities(self, text):
30
+ """Enhanced entity extraction with custom categories"""
31
+ doc = self.nlp(text)
32
+ return {
33
+ 'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'],
34
+ 'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'],
35
+ 'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE']
36
+ }
37
+
38
+ def _generate_features(self, jd_entities, resume_text):
39
+ """Generate multi-modal features"""
40
+ resume_entities = self._extract_entities(resume_text)
41
+
42
+ # Semantic similarity
43
+ jd_embed = self.sentence_model.encode([resume_text])[0]
44
+ resume_embed = self.sentence_model.encode([resume_text])[0]
45
+ semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0]
46
+
47
+ # Entity matching scores
48
+ skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills']))
49
+
50
+ return {
51
+ 'semantic_similarity': semantic_sim,
52
+ 'skill_match': skill_match,
53
+ 'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education']))
54
+ }
55
+
56
+ def train_model(self, X, y):
57
+ """MLOps enabled training pipeline"""
58
+ with mlflow.start_run():
59
+ preprocessor = ColumnTransformer([
60
+ ('text', Pipeline([
61
+ ('embed', SentenceTransformer('all-mpnet-base-v2')),
62
+ ('scaler', StandardScaler())
63
+ ]), 'resume_text')
64
+ ])
65
+
66
+ model = Pipeline([
67
+ ('preproc', preprocessor),
68
+ ('regressor', GradientBoostingRegressor())
69
+ ])
70
+
71
+ model.fit(X, y)
72
+ mlflow.sklearn.log_model(model, "model")
73
+ return model
74
+
75
+ class MLOpsDashboard:
76
+ def __init__(self):
77
+ self.metrics = {
78
+ 'model_performance': [],
79
+ 'system_health': [],
80
+ 'data_quality': []
81
+ }
82
+
83
+ def update_metrics(self, new_metrics):
84
+ for k, v in new_metrics.items():
85
+ self.metrics[k].append(v)
86
+
87
+ def main():
88
+ st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide")
89
+ st.title("🚀 Next-Gen Resume Ranking System with AIOPs/MLOps")
90
+
91
+ processor = AdvancedResumeProcessor()
92
+ dashboard = MLOpsDashboard()
93
+
94
+ with st.sidebar:
95
+ st.header("AIOPs Dashboard")
96
+ processor.system_monitor.display_metrics()
97
+ st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU")
98
+
99
+ st.header("MLOps Controls")
100
+ retrain = st.button("Retrain Production Model")
101
+ if retrain:
102
+ with st.spinner("Retraining model..."):
103
+ # Add retraining logic here
104
+ st.success("Model updated in production!")
105
+
106
+ main_col1, main_col2 = st.columns([3, 2])
107
+
108
+ with main_col1:
109
+ st.header("Upload Files")
110
+ jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"])
111
+ resume_files = st.file_uploader("Resumes (PDF/TXT)",
112
+ type=["pdf", "txt"],
113
+ accept_multiple_files=True)
114
+
115
+ if jd_file and resume_files:
116
+ try:
117
+ # Process job description
118
+ jd_text = processor.extract_text(jd_file)
119
+ jd_entities = processor._extract_entities(jd_text)
120
+
121
+ # Process resumes and generate features
122
+ results = []
123
+ for file in resume_files:
124
+ resume_text = processor.extract_text(file)
125
+ features = processor._generate_features(jd_entities, resume_text)
126
+
127
+ # Generate LLM feedback
128
+ llm_feedback = processor.llm(
129
+ f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}"
130
+ )[0]['generated_text'] if processor.llm else "LLM unavailable"
131
+
132
+ results.append({
133
+ "Filename": file.name,
134
+ **features,
135
+ "LLM Feedback": llm_feedback[:200] + "..."
136
+ })
137
+
138
+ # Display results
139
+ df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False)
140
+ st.subheader("Ranking Results with Explainability")
141
+ st.dataframe(
142
+ df,
143
+ column_config={
144
+ "semantic_similarity": "Semantic Match",
145
+ "skill_match": "Skill Matches",
146
+ "education_match": "Education Match"
147
+ },
148
+ use_container_width=True
149
+ )
150
+
151
+ # MLOps logging
152
+ dashboard.update_metrics({
153
+ 'model_performance': df['semantic_similarity'].mean(),
154
+ 'data_quality': len(resume_files)
155
+ })
156
+
157
+ except Exception as e:
158
+ processor.logger.error(f"Processing error: {str(e)}")
159
+ st.error(f"System error: {str(e)}")
160
+
161
+ with main_col2:
162
+ st.header("Model Explainability")
163
+ if 'df' in locals():
164
+ st.plotly_chart(create_shap_plot(df)) # Implement SHAP visualization
165
+ st.download_button("Export Evaluation Report",
166
+ generate_report(df),
167
+ file_name="ranking_report.pdf")
168
+
169
+ st.header("LLM Feedback Analysis")
170
+ if 'df' in locals():
171
+ st.table(df[["Filename", "LLM Feedback"]].set_index("Filename"))
172
+
173
+ if __name__ == "__main__":
174
+ main()