Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,125 +1,125 @@
|
|
1 |
-
from flask import Flask, render_template, request, jsonify
|
2 |
-
from flask_cors import CORS
|
3 |
-
import fitz # PyMuPDF for PDF text extraction
|
4 |
-
import spacy
|
5 |
-
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
6 |
-
import torch
|
7 |
-
import os
|
8 |
-
|
9 |
-
app = Flask(__name__)
|
10 |
-
CORS(app)
|
11 |
-
|
12 |
-
# ===== Load Custom NER Model =====
|
13 |
-
try:
|
14 |
-
nlp = spacy.load("custom_ner_model") # Load your custom-trained NER model
|
15 |
-
print("Custom NER model loaded successfully.")
|
16 |
-
except Exception as e:
|
17 |
-
print(f"Error loading custom NER model: {e}")
|
18 |
-
exit()
|
19 |
-
|
20 |
-
# ===== Load T5 Model for Job Title Prediction =====
|
21 |
-
tokenizer = T5Tokenizer.from_pretrained("t5-
|
22 |
-
model = T5ForConditionalGeneration.from_pretrained("t5-
|
23 |
-
|
24 |
-
# Load model weights
|
25 |
-
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
26 |
-
model.load_state_dict(torch.load("best.pth", map_location=device))
|
27 |
-
model.eval()
|
28 |
-
model.to(device)
|
29 |
-
|
30 |
-
print("T5 model for job title prediction loaded successfully.")
|
31 |
-
|
32 |
-
# ===== Helper Functions =====
|
33 |
-
|
34 |
-
# Extract text from PDF
|
35 |
-
def extract_text_from_pdf(pdf_file):
|
36 |
-
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
37 |
-
text = ""
|
38 |
-
for page in doc:
|
39 |
-
text += page.get_text()
|
40 |
-
return text
|
41 |
-
|
42 |
-
# Extract entities using Custom NER
|
43 |
-
def extract_entities(text):
|
44 |
-
text=text.replace("\\n","\n")
|
45 |
-
doc = nlp(text) # Process text with custom NER
|
46 |
-
extracted_data = {}
|
47 |
-
|
48 |
-
for ent in doc.ents:
|
49 |
-
# Use only relevant labels
|
50 |
-
if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
|
51 |
-
if ent.label_ not in extracted_data:
|
52 |
-
extracted_data[ent.label_] = []
|
53 |
-
if ent.text not in extracted_data[ent.label_]:
|
54 |
-
extracted_data[ent.label_].append(ent.text)
|
55 |
-
|
56 |
-
# Format results as comma-separated strings
|
57 |
-
for key in extracted_data:
|
58 |
-
extracted_data[key] = ", ".join(extracted_data[key])
|
59 |
-
return extracted_data
|
60 |
-
|
61 |
-
# Predict job title using T5 model
|
62 |
-
def predict_job_title(skills, area,roles,location,industry):
|
63 |
-
input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
|
64 |
-
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
|
65 |
-
|
66 |
-
with torch.no_grad():
|
67 |
-
outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
|
68 |
-
|
69 |
-
predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
70 |
-
return predicted_job_title
|
71 |
-
|
72 |
-
# ===== Flask Routes =====
|
73 |
-
|
74 |
-
@app.route('/')
|
75 |
-
def home():
|
76 |
-
return render_template('index.html') # Default home page
|
77 |
-
|
78 |
-
@app.route('/predict', methods=['POST'])
|
79 |
-
def predict():
|
80 |
-
if 'resume' not in request.files:
|
81 |
-
return jsonify({'error': 'No file uploaded'}), 400
|
82 |
-
|
83 |
-
file = request.files['resume']
|
84 |
-
if file.filename == '':
|
85 |
-
return jsonify({'error': 'No file selected'}), 400
|
86 |
-
|
87 |
-
if not file.filename.endswith('.pdf'):
|
88 |
-
return jsonify({'error': 'Please upload a PDF file'}), 400
|
89 |
-
|
90 |
-
try:
|
91 |
-
# Step 1: Extract text from PDF
|
92 |
-
resume_text = extract_text_from_pdf(file)
|
93 |
-
|
94 |
-
# Step 2: Extract entities using Custom NER
|
95 |
-
extracted_data = extract_entities(resume_text)
|
96 |
-
|
97 |
-
# Step 3: Prepare input for T5 prediction
|
98 |
-
skills = extracted_data.get("SKILL", "")
|
99 |
-
area = extracted_data.get("AREA", "")
|
100 |
-
roles = extracted_data.get("ROLE", "")
|
101 |
-
location = extracted_data.get("LOCATION", "")
|
102 |
-
industry = extracted_data.get("INDUSTRY", "")
|
103 |
-
# Step 4: Predict job title
|
104 |
-
predicted_title = predict_job_title(skills, area,roles,location,industry)
|
105 |
-
|
106 |
-
# Step 5: Return response
|
107 |
-
response = {
|
108 |
-
'success': True,
|
109 |
-
'predicted_title': predicted_title.split(";")[0],
|
110 |
-
'extracted_skills': extracted_data.get("SKILL", ""),
|
111 |
-
'roles': extracted_data.get("ROLE", ""),
|
112 |
-
'locations': extracted_data.get("LOCATION", ""),
|
113 |
-
'area': extracted_data.get("AREA", ""),
|
114 |
-
'industry': extracted_data.get("INDUSTRY", "")
|
115 |
-
}
|
116 |
-
|
117 |
-
return jsonify(response)
|
118 |
-
|
119 |
-
except Exception as e:
|
120 |
-
return jsonify({'error': str(e)}), 500
|
121 |
-
|
122 |
-
if __name__ == '__main__':
|
123 |
-
from waitress import serve
|
124 |
-
print("Starting Flask app...")
|
125 |
-
serve(app, host="0.0.0.0", port=7860)
|
|
|
1 |
+
from flask import Flask, render_template, request, jsonify
|
2 |
+
from flask_cors import CORS
|
3 |
+
import fitz # PyMuPDF for PDF text extraction
|
4 |
+
import spacy
|
5 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
6 |
+
import torch
|
7 |
+
import os
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
CORS(app)
|
11 |
+
|
12 |
+
# ===== Load Custom NER Model =====
|
13 |
+
try:
|
14 |
+
nlp = spacy.load("custom_ner_model") # Load your custom-trained NER model
|
15 |
+
print("Custom NER model loaded successfully.")
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Error loading custom NER model: {e}")
|
18 |
+
exit()
|
19 |
+
|
20 |
+
# ===== Load T5 Model for Job Title Prediction =====
|
21 |
+
tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
22 |
+
model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
23 |
+
|
24 |
+
# Load model weights
|
25 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
26 |
+
model.load_state_dict(torch.load("best.pth", map_location=device))
|
27 |
+
model.eval()
|
28 |
+
model.to(device)
|
29 |
+
|
30 |
+
print("T5 model for job title prediction loaded successfully.")
|
31 |
+
|
32 |
+
# ===== Helper Functions =====
|
33 |
+
|
34 |
+
# Extract text from PDF
|
35 |
+
def extract_text_from_pdf(pdf_file):
|
36 |
+
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
37 |
+
text = ""
|
38 |
+
for page in doc:
|
39 |
+
text += page.get_text()
|
40 |
+
return text
|
41 |
+
|
42 |
+
# Extract entities using Custom NER
|
43 |
+
def extract_entities(text):
|
44 |
+
text=text.replace("\\n","\n")
|
45 |
+
doc = nlp(text) # Process text with custom NER
|
46 |
+
extracted_data = {}
|
47 |
+
|
48 |
+
for ent in doc.ents:
|
49 |
+
# Use only relevant labels
|
50 |
+
if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
|
51 |
+
if ent.label_ not in extracted_data:
|
52 |
+
extracted_data[ent.label_] = []
|
53 |
+
if ent.text not in extracted_data[ent.label_]:
|
54 |
+
extracted_data[ent.label_].append(ent.text)
|
55 |
+
|
56 |
+
# Format results as comma-separated strings
|
57 |
+
for key in extracted_data:
|
58 |
+
extracted_data[key] = ", ".join(extracted_data[key])
|
59 |
+
return extracted_data
|
60 |
+
|
61 |
+
# Predict job title using T5 model
|
62 |
+
def predict_job_title(skills, area,roles,location,industry):
|
63 |
+
input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
|
64 |
+
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
|
65 |
+
|
66 |
+
with torch.no_grad():
|
67 |
+
outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
|
68 |
+
|
69 |
+
predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
70 |
+
return predicted_job_title
|
71 |
+
|
72 |
+
# ===== Flask Routes =====
|
73 |
+
|
74 |
+
@app.route('/')
|
75 |
+
def home():
|
76 |
+
return render_template('index.html') # Default home page
|
77 |
+
|
78 |
+
@app.route('/predict', methods=['POST'])
|
79 |
+
def predict():
|
80 |
+
if 'resume' not in request.files:
|
81 |
+
return jsonify({'error': 'No file uploaded'}), 400
|
82 |
+
|
83 |
+
file = request.files['resume']
|
84 |
+
if file.filename == '':
|
85 |
+
return jsonify({'error': 'No file selected'}), 400
|
86 |
+
|
87 |
+
if not file.filename.endswith('.pdf'):
|
88 |
+
return jsonify({'error': 'Please upload a PDF file'}), 400
|
89 |
+
|
90 |
+
try:
|
91 |
+
# Step 1: Extract text from PDF
|
92 |
+
resume_text = extract_text_from_pdf(file)
|
93 |
+
|
94 |
+
# Step 2: Extract entities using Custom NER
|
95 |
+
extracted_data = extract_entities(resume_text)
|
96 |
+
|
97 |
+
# Step 3: Prepare input for T5 prediction
|
98 |
+
skills = extracted_data.get("SKILL", "")
|
99 |
+
area = extracted_data.get("AREA", "")
|
100 |
+
roles = extracted_data.get("ROLE", "")
|
101 |
+
location = extracted_data.get("LOCATION", "")
|
102 |
+
industry = extracted_data.get("INDUSTRY", "")
|
103 |
+
# Step 4: Predict job title
|
104 |
+
predicted_title = predict_job_title(skills, area,roles,location,industry)
|
105 |
+
|
106 |
+
# Step 5: Return response
|
107 |
+
response = {
|
108 |
+
'success': True,
|
109 |
+
'predicted_title': predicted_title.split(";")[0],
|
110 |
+
'extracted_skills': extracted_data.get("SKILL", ""),
|
111 |
+
'roles': extracted_data.get("ROLE", ""),
|
112 |
+
'locations': extracted_data.get("LOCATION", ""),
|
113 |
+
'area': extracted_data.get("AREA", ""),
|
114 |
+
'industry': extracted_data.get("INDUSTRY", "")
|
115 |
+
}
|
116 |
+
|
117 |
+
return jsonify(response)
|
118 |
+
|
119 |
+
except Exception as e:
|
120 |
+
return jsonify({'error': str(e)}), 500
|
121 |
+
|
122 |
+
if __name__ == '__main__':
|
123 |
+
from waitress import serve
|
124 |
+
print("Starting Flask app...")
|
125 |
+
serve(app, host="0.0.0.0", port=7860)
|