File size: 4,319 Bytes
032b18b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
import fitz  # PyMuPDF for PDF text extraction
import spacy
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os

app = Flask(__name__)
CORS(app)

# ===== Load Custom NER Model =====
try:
    nlp = spacy.load("custom_ner_model")  # Load your custom-trained NER model
    print("Custom NER model loaded successfully.")
except Exception as e:
    print(f"Error loading custom NER model: {e}")
    exit()

# ===== Load T5 Model for Job Title Prediction =====
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

# Load model weights
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.load_state_dict(torch.load("best.pth", map_location=device))
model.eval()
model.to(device)

print("T5 model for job title prediction loaded successfully.")

# ===== Helper Functions =====

# Extract text from PDF
def extract_text_from_pdf(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Extract entities using Custom NER
def extract_entities(text):
    text=text.replace("\\n","\n")
    doc = nlp(text)  # Process text with custom NER
    extracted_data = {}
    
    for ent in doc.ents:
        # Use only relevant labels
        if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
            if ent.label_ not in extracted_data:
                extracted_data[ent.label_] = []
            if ent.text not in extracted_data[ent.label_]:
                extracted_data[ent.label_].append(ent.text)
    
    # Format results as comma-separated strings
    for key in extracted_data:
        extracted_data[key] = ", ".join(extracted_data[key])
    return extracted_data

# Predict job title using T5 model
def predict_job_title(skills, area,roles,location,industry):
    input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
    
    with torch.no_grad():
        outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
    
    predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return predicted_job_title

# ===== Flask Routes =====

@app.route('/')
def home():
    return render_template('index.html')  # Default home page

@app.route('/predict', methods=['POST'])
def predict():
    if 'resume' not in request.files:
        return jsonify({'error': 'No file uploaded'}), 400
    
    file = request.files['resume']
    if file.filename == '':
        return jsonify({'error': 'No file selected'}), 400
    
    if not file.filename.endswith('.pdf'):
        return jsonify({'error': 'Please upload a PDF file'}), 400

    try:
        # Step 1: Extract text from PDF
        resume_text = extract_text_from_pdf(file)
        
        # Step 2: Extract entities using Custom NER
        extracted_data = extract_entities(resume_text)
        
        # Step 3: Prepare input for T5 prediction
        skills = extracted_data.get("SKILL", "")
        area = extracted_data.get("AREA", "")
        roles = extracted_data.get("ROLE", "")
        location = extracted_data.get("LOCATION", "")
        industry = extracted_data.get("INDUSTRY", "")
        # Step 4: Predict job title
        predicted_title = predict_job_title(skills, area,roles,location,industry)
        
        # Step 5: Return response
        response = {
            'success': True,
            'predicted_title': predicted_title.split(";")[0],
            'extracted_skills': extracted_data.get("SKILL", ""),
            'roles': extracted_data.get("ROLE", ""),
            'locations': extracted_data.get("LOCATION", ""),
            'area': extracted_data.get("AREA", ""),
            'industry': extracted_data.get("INDUSTRY", "")
        }
        
        return jsonify(response)
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    from waitress import serve
    print("Starting Flask app...")
    serve(app, host="0.0.0.0", port=7860)