Spaces:
Sleeping
Sleeping
from flask import Flask, render_template, request, jsonify | |
from flask_cors import CORS | |
import fitz # PyMuPDF for PDF text extraction | |
import spacy | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
import torch | |
import os | |
app = Flask(__name__) | |
CORS(app) | |
# ===== Load Custom NER Model ===== | |
try: | |
nlp = spacy.load("custom_ner_model") # Load your custom-trained NER model | |
print("Custom NER model loaded successfully.") | |
except Exception as e: | |
print(f"Error loading custom NER model: {e}") | |
exit() | |
# ===== Load T5 Model for Job Title Prediction ===== | |
tokenizer = T5Tokenizer.from_pretrained("t5-base") | |
model = T5ForConditionalGeneration.from_pretrained("t5-base") | |
# Load model weights | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
model.load_state_dict(torch.load("best.pth", map_location=device)) | |
model.eval() | |
model.to(device) | |
print("T5 model for job title prediction loaded successfully.") | |
# ===== Helper Functions ===== | |
# Extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
# Extract entities using Custom NER | |
def extract_entities(text): | |
text=text.replace("\\n","\n") | |
doc = nlp(text) # Process text with custom NER | |
extracted_data = {} | |
for ent in doc.ents: | |
# Use only relevant labels | |
if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]: | |
if ent.label_ not in extracted_data: | |
extracted_data[ent.label_] = [] | |
if ent.text not in extracted_data[ent.label_]: | |
extracted_data[ent.label_].append(ent.text) | |
# Format results as comma-separated strings | |
for key in extracted_data: | |
extracted_data[key] = ", ".join(extracted_data[key]) | |
return extracted_data | |
# Predict job title using T5 model | |
def predict_job_title(skills, area,roles,location,industry): | |
input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}" | |
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device) | |
with torch.no_grad(): | |
outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True) | |
predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return predicted_job_title | |
# ===== Flask Routes ===== | |
def home(): | |
return render_template('index.html') # Default home page | |
def predict(): | |
if 'resume' not in request.files: | |
return jsonify({'error': 'No file uploaded'}), 400 | |
file = request.files['resume'] | |
if file.filename == '': | |
return jsonify({'error': 'No file selected'}), 400 | |
if not file.filename.endswith('.pdf'): | |
return jsonify({'error': 'Please upload a PDF file'}), 400 | |
try: | |
# Step 1: Extract text from PDF | |
resume_text = extract_text_from_pdf(file) | |
# Step 2: Extract entities using Custom NER | |
extracted_data = extract_entities(resume_text) | |
# Step 3: Prepare input for T5 prediction | |
skills = extracted_data.get("SKILL", "") | |
area = extracted_data.get("AREA", "") | |
roles = extracted_data.get("ROLE", "") | |
location = extracted_data.get("LOCATION", "") | |
industry = extracted_data.get("INDUSTRY", "") | |
# Step 4: Predict job title | |
predicted_title = predict_job_title(skills, area,roles,location,industry) | |
# Step 5: Return response | |
response = { | |
'success': True, | |
'predicted_title': predicted_title.split(";")[0], | |
'extracted_skills': extracted_data.get("SKILL", ""), | |
'roles': extracted_data.get("ROLE", ""), | |
'locations': extracted_data.get("LOCATION", ""), | |
'area': extracted_data.get("AREA", ""), | |
'industry': extracted_data.get("INDUSTRY", "") | |
} | |
return jsonify(response) | |
except Exception as e: | |
return jsonify({'error': str(e)}), 500 | |
if __name__ == '__main__': | |
from waitress import serve | |
print("Starting Flask app...") | |
serve(app, host="0.0.0.0", port=7860) | |