thejagstudio commited on
Commit
032b18b
·
verified ·
1 Parent(s): 89f8667

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +125 -125
main.py CHANGED
@@ -1,125 +1,125 @@
1
- from flask import Flask, render_template, request, jsonify
2
- from flask_cors import CORS
3
- import fitz # PyMuPDF for PDF text extraction
4
- import spacy
5
- from transformers import T5Tokenizer, T5ForConditionalGeneration
6
- import torch
7
- import os
8
-
9
- app = Flask(__name__)
10
- CORS(app)
11
-
12
- # ===== Load Custom NER Model =====
13
- try:
14
- nlp = spacy.load("custom_ner_model") # Load your custom-trained NER model
15
- print("Custom NER model loaded successfully.")
16
- except Exception as e:
17
- print(f"Error loading custom NER model: {e}")
18
- exit()
19
-
20
- # ===== Load T5 Model for Job Title Prediction =====
21
- tokenizer = T5Tokenizer.from_pretrained("t5-small")
22
- model = T5ForConditionalGeneration.from_pretrained("t5-small")
23
-
24
- # Load model weights
25
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
- model.load_state_dict(torch.load("best.pth", map_location=device))
27
- model.eval()
28
- model.to(device)
29
-
30
- print("T5 model for job title prediction loaded successfully.")
31
-
32
- # ===== Helper Functions =====
33
-
34
- # Extract text from PDF
35
- def extract_text_from_pdf(pdf_file):
36
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
37
- text = ""
38
- for page in doc:
39
- text += page.get_text()
40
- return text
41
-
42
- # Extract entities using Custom NER
43
- def extract_entities(text):
44
- text=text.replace("\\n","\n")
45
- doc = nlp(text) # Process text with custom NER
46
- extracted_data = {}
47
-
48
- for ent in doc.ents:
49
- # Use only relevant labels
50
- if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
51
- if ent.label_ not in extracted_data:
52
- extracted_data[ent.label_] = []
53
- if ent.text not in extracted_data[ent.label_]:
54
- extracted_data[ent.label_].append(ent.text)
55
-
56
- # Format results as comma-separated strings
57
- for key in extracted_data:
58
- extracted_data[key] = ", ".join(extracted_data[key])
59
- return extracted_data
60
-
61
- # Predict job title using T5 model
62
- def predict_job_title(skills, area,roles,location,industry):
63
- input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
64
- inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
65
-
66
- with torch.no_grad():
67
- outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
68
-
69
- predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
- return predicted_job_title
71
-
72
- # ===== Flask Routes =====
73
-
74
- @app.route('/')
75
- def home():
76
- return render_template('index.html') # Default home page
77
-
78
- @app.route('/predict', methods=['POST'])
79
- def predict():
80
- if 'resume' not in request.files:
81
- return jsonify({'error': 'No file uploaded'}), 400
82
-
83
- file = request.files['resume']
84
- if file.filename == '':
85
- return jsonify({'error': 'No file selected'}), 400
86
-
87
- if not file.filename.endswith('.pdf'):
88
- return jsonify({'error': 'Please upload a PDF file'}), 400
89
-
90
- try:
91
- # Step 1: Extract text from PDF
92
- resume_text = extract_text_from_pdf(file)
93
-
94
- # Step 2: Extract entities using Custom NER
95
- extracted_data = extract_entities(resume_text)
96
-
97
- # Step 3: Prepare input for T5 prediction
98
- skills = extracted_data.get("SKILL", "")
99
- area = extracted_data.get("AREA", "")
100
- roles = extracted_data.get("ROLE", "")
101
- location = extracted_data.get("LOCATION", "")
102
- industry = extracted_data.get("INDUSTRY", "")
103
- # Step 4: Predict job title
104
- predicted_title = predict_job_title(skills, area,roles,location,industry)
105
-
106
- # Step 5: Return response
107
- response = {
108
- 'success': True,
109
- 'predicted_title': predicted_title.split(";")[0],
110
- 'extracted_skills': extracted_data.get("SKILL", ""),
111
- 'roles': extracted_data.get("ROLE", ""),
112
- 'locations': extracted_data.get("LOCATION", ""),
113
- 'area': extracted_data.get("AREA", ""),
114
- 'industry': extracted_data.get("INDUSTRY", "")
115
- }
116
-
117
- return jsonify(response)
118
-
119
- except Exception as e:
120
- return jsonify({'error': str(e)}), 500
121
-
122
- if __name__ == '__main__':
123
- from waitress import serve
124
- print("Starting Flask app...")
125
- serve(app, host="0.0.0.0", port=7860)
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ from flask_cors import CORS
3
+ import fitz # PyMuPDF for PDF text extraction
4
+ import spacy
5
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
6
+ import torch
7
+ import os
8
+
9
+ app = Flask(__name__)
10
+ CORS(app)
11
+
12
+ # ===== Load Custom NER Model =====
13
+ try:
14
+ nlp = spacy.load("custom_ner_model") # Load your custom-trained NER model
15
+ print("Custom NER model loaded successfully.")
16
+ except Exception as e:
17
+ print(f"Error loading custom NER model: {e}")
18
+ exit()
19
+
20
+ # ===== Load T5 Model for Job Title Prediction =====
21
+ tokenizer = T5Tokenizer.from_pretrained("t5-base")
22
+ model = T5ForConditionalGeneration.from_pretrained("t5-base")
23
+
24
+ # Load model weights
25
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+ model.load_state_dict(torch.load("best.pth", map_location=device))
27
+ model.eval()
28
+ model.to(device)
29
+
30
+ print("T5 model for job title prediction loaded successfully.")
31
+
32
+ # ===== Helper Functions =====
33
+
34
+ # Extract text from PDF
35
+ def extract_text_from_pdf(pdf_file):
36
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
37
+ text = ""
38
+ for page in doc:
39
+ text += page.get_text()
40
+ return text
41
+
42
+ # Extract entities using Custom NER
43
+ def extract_entities(text):
44
+ text=text.replace("\\n","\n")
45
+ doc = nlp(text) # Process text with custom NER
46
+ extracted_data = {}
47
+
48
+ for ent in doc.ents:
49
+ # Use only relevant labels
50
+ if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
51
+ if ent.label_ not in extracted_data:
52
+ extracted_data[ent.label_] = []
53
+ if ent.text not in extracted_data[ent.label_]:
54
+ extracted_data[ent.label_].append(ent.text)
55
+
56
+ # Format results as comma-separated strings
57
+ for key in extracted_data:
58
+ extracted_data[key] = ", ".join(extracted_data[key])
59
+ return extracted_data
60
+
61
+ # Predict job title using T5 model
62
+ def predict_job_title(skills, area,roles,location,industry):
63
+ input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
64
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
65
+
66
+ with torch.no_grad():
67
+ outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
68
+
69
+ predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
+ return predicted_job_title
71
+
72
+ # ===== Flask Routes =====
73
+
74
+ @app.route('/')
75
+ def home():
76
+ return render_template('index.html') # Default home page
77
+
78
+ @app.route('/predict', methods=['POST'])
79
+ def predict():
80
+ if 'resume' not in request.files:
81
+ return jsonify({'error': 'No file uploaded'}), 400
82
+
83
+ file = request.files['resume']
84
+ if file.filename == '':
85
+ return jsonify({'error': 'No file selected'}), 400
86
+
87
+ if not file.filename.endswith('.pdf'):
88
+ return jsonify({'error': 'Please upload a PDF file'}), 400
89
+
90
+ try:
91
+ # Step 1: Extract text from PDF
92
+ resume_text = extract_text_from_pdf(file)
93
+
94
+ # Step 2: Extract entities using Custom NER
95
+ extracted_data = extract_entities(resume_text)
96
+
97
+ # Step 3: Prepare input for T5 prediction
98
+ skills = extracted_data.get("SKILL", "")
99
+ area = extracted_data.get("AREA", "")
100
+ roles = extracted_data.get("ROLE", "")
101
+ location = extracted_data.get("LOCATION", "")
102
+ industry = extracted_data.get("INDUSTRY", "")
103
+ # Step 4: Predict job title
104
+ predicted_title = predict_job_title(skills, area,roles,location,industry)
105
+
106
+ # Step 5: Return response
107
+ response = {
108
+ 'success': True,
109
+ 'predicted_title': predicted_title.split(";")[0],
110
+ 'extracted_skills': extracted_data.get("SKILL", ""),
111
+ 'roles': extracted_data.get("ROLE", ""),
112
+ 'locations': extracted_data.get("LOCATION", ""),
113
+ 'area': extracted_data.get("AREA", ""),
114
+ 'industry': extracted_data.get("INDUSTRY", "")
115
+ }
116
+
117
+ return jsonify(response)
118
+
119
+ except Exception as e:
120
+ return jsonify({'error': str(e)}), 500
121
+
122
+ if __name__ == '__main__':
123
+ from waitress import serve
124
+ print("Starting Flask app...")
125
+ serve(app, host="0.0.0.0", port=7860)