Spaces:

thejagstudio
/

Resume-Title-Predictor

Sleeping

App Files Files Community

thejagstudio commited on Dec 14, 2024

Commit

032b18b

verified ·

1 Parent(s): 89f8667

Update main.py

Browse files

Files changed (1) hide show

main.py +125 -125

main.py CHANGED Viewed

@@ -1,125 +1,125 @@
-from flask import Flask, render_template, request, jsonify
-from flask_cors import CORS
-import fitz  # PyMuPDF for PDF text extraction
-import spacy
-from transformers import T5Tokenizer, T5ForConditionalGeneration
-import torch
-import os
-app = Flask(__name__)
-CORS(app)
-# ===== Load Custom NER Model =====
-try:
-    nlp = spacy.load("custom_ner_model")  # Load your custom-trained NER model
-    print("Custom NER model loaded successfully.")
-except Exception as e:
-    print(f"Error loading custom NER model: {e}")
-    exit()
-# ===== Load T5 Model for Job Title Prediction =====
-tokenizer = T5Tokenizer.from_pretrained("t5-small")
-model = T5ForConditionalGeneration.from_pretrained("t5-small")
-# Load model weights
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-model.load_state_dict(torch.load("best.pth", map_location=device))
-model.eval()
-model.to(device)
-print("T5 model for job title prediction loaded successfully.")
-# ===== Helper Functions =====
-# Extract text from PDF
-def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-# Extract entities using Custom NER
-def extract_entities(text):
-    text=text.replace("\\n","\n")
-    doc = nlp(text)  # Process text with custom NER
-    extracted_data = {}
-    for ent in doc.ents:
-        # Use only relevant labels
-        if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
-            if ent.label_ not in extracted_data:
-                extracted_data[ent.label_] = []
-            if ent.text not in extracted_data[ent.label_]:
-                extracted_data[ent.label_].append(ent.text)
-    # Format results as comma-separated strings
-    for key in extracted_data:
-        extracted_data[key] = ", ".join(extracted_data[key])
-    return extracted_data
-# Predict job title using T5 model
-def predict_job_title(skills, area,roles,location,industry):
-    input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
-    with torch.no_grad():
-        outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
-    predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return predicted_job_title
-# ===== Flask Routes =====
-@app.route('/')
-def home():
-    return render_template('index.html')  # Default home page
-@app.route('/predict', methods=['POST'])
-def predict():
-    if 'resume' not in request.files:
-        return jsonify({'error': 'No file uploaded'}), 400
-    file = request.files['resume']
-    if file.filename == '':
-        return jsonify({'error': 'No file selected'}), 400
-    if not file.filename.endswith('.pdf'):
-        return jsonify({'error': 'Please upload a PDF file'}), 400
-    try:
-        # Step 1: Extract text from PDF
-        resume_text = extract_text_from_pdf(file)
-        # Step 2: Extract entities using Custom NER
-        extracted_data = extract_entities(resume_text)
-        # Step 3: Prepare input for T5 prediction
-        skills = extracted_data.get("SKILL", "")
-        area = extracted_data.get("AREA", "")
-        roles = extracted_data.get("ROLE", "")
-        location = extracted_data.get("LOCATION", "")
-        industry = extracted_data.get("INDUSTRY", "")
-        # Step 4: Predict job title
-        predicted_title = predict_job_title(skills, area,roles,location,industry)
-        # Step 5: Return response
-        response = {
-            'success': True,
-            'predicted_title': predicted_title.split(";")[0],
-            'extracted_skills': extracted_data.get("SKILL", ""),
-            'roles': extracted_data.get("ROLE", ""),
-            'locations': extracted_data.get("LOCATION", ""),
-            'area': extracted_data.get("AREA", ""),
-            'industry': extracted_data.get("INDUSTRY", "")
-        }
-        return jsonify(response)
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
-if __name__ == '__main__':
-    from waitress import serve
-    print("Starting Flask app...")
-    serve(app, host="0.0.0.0", port=7860)

+from flask import Flask, render_template, request, jsonify
+from flask_cors import CORS
+import fitz  # PyMuPDF for PDF text extraction
+import spacy
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+import os
+app = Flask(__name__)
+CORS(app)
+# ===== Load Custom NER Model =====
+try:
+    nlp = spacy.load("custom_ner_model")  # Load your custom-trained NER model
+    print("Custom NER model loaded successfully.")
+except Exception as e:
+    print(f"Error loading custom NER model: {e}")
+    exit()
+# ===== Load T5 Model for Job Title Prediction =====
+tokenizer = T5Tokenizer.from_pretrained("t5-base")
+model = T5ForConditionalGeneration.from_pretrained("t5-base")
+# Load model weights
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.load_state_dict(torch.load("best.pth", map_location=device))
+model.eval()
+model.to(device)
+print("T5 model for job title prediction loaded successfully.")
+# ===== Helper Functions =====
+# Extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+# Extract entities using Custom NER
+def extract_entities(text):
+    text=text.replace("\\n","\n")
+    doc = nlp(text)  # Process text with custom NER
+    extracted_data = {}
+    for ent in doc.ents:
+        # Use only relevant labels
+        if ent.label_ in ["SKILL", "ROLE", "LOCATION", "AREA", "INDUSTRY"]:
+            if ent.label_ not in extracted_data:
+                extracted_data[ent.label_] = []
+            if ent.text not in extracted_data[ent.label_]:
+                extracted_data[ent.label_].append(ent.text)
+    # Format results as comma-separated strings
+    for key in extracted_data:
+        extracted_data[key] = ", ".join(extracted_data[key])
+    return extracted_data
+# Predict job title using T5 model
+def predict_job_title(skills, area,roles,location,industry):
+    input_text = f"Skills: {skills}; \nRole: {roles}; \nLocation: {location}; \nArea: {area}; \nIndustry: {industry}"
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
+    with torch.no_grad():
+        outputs = model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
+    predicted_job_title = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return predicted_job_title
+# ===== Flask Routes =====
+@app.route('/')
+def home():
+    return render_template('index.html')  # Default home page
+@app.route('/predict', methods=['POST'])
+def predict():
+    if 'resume' not in request.files:
+        return jsonify({'error': 'No file uploaded'}), 400
+    file = request.files['resume']
+    if file.filename == '':
+        return jsonify({'error': 'No file selected'}), 400
+    if not file.filename.endswith('.pdf'):
+        return jsonify({'error': 'Please upload a PDF file'}), 400
+    try:
+        # Step 1: Extract text from PDF
+        resume_text = extract_text_from_pdf(file)
+        # Step 2: Extract entities using Custom NER
+        extracted_data = extract_entities(resume_text)
+        # Step 3: Prepare input for T5 prediction
+        skills = extracted_data.get("SKILL", "")
+        area = extracted_data.get("AREA", "")
+        roles = extracted_data.get("ROLE", "")
+        location = extracted_data.get("LOCATION", "")
+        industry = extracted_data.get("INDUSTRY", "")
+        # Step 4: Predict job title
+        predicted_title = predict_job_title(skills, area,roles,location,industry)
+        # Step 5: Return response
+        response = {
+            'success': True,
+            'predicted_title': predicted_title.split(";")[0],
+            'extracted_skills': extracted_data.get("SKILL", ""),
+            'roles': extracted_data.get("ROLE", ""),
+            'locations': extracted_data.get("LOCATION", ""),
+            'area': extracted_data.get("AREA", ""),
+            'industry': extracted_data.get("INDUSTRY", "")
+        }
+        return jsonify(response)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    from waitress import serve
+    print("Starting Flask app...")
+    serve(app, host="0.0.0.0", port=7860)