ak0601 commited on
Commit
86b9caf
·
verified ·
1 Parent(s): 142f88b

Update reccomendation.py

Browse files
Files changed (1) hide show
  1. reccomendation.py +1096 -11
reccomendation.py CHANGED
@@ -1,3 +1,1088 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import requests
3
  from pydantic import BaseModel, Field
@@ -147,10 +1232,10 @@ def get_access_token():
147
  return access_token
148
 
149
  try:
150
- login_url = "https://fitscore-agent-535960463668.us-central1.run.app/auth/login"
151
  login_data = {
152
- "email": "johnrecruiter@example.com",
153
- "password": "Password@123"
154
  }
155
  login_headers = {
156
  'accept': 'application/json',
@@ -158,7 +1243,7 @@ def get_access_token():
158
  }
159
 
160
  # Add timeout to prevent hanging
161
- login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=LOGIN_TIMEOUT)
162
 
163
  if login_response.status_code == 200:
164
  login_result = login_response.json()
@@ -188,7 +1273,7 @@ def generate_smart_hiring_collateral(job_description_text: str) -> tuple[str, st
188
  Returns a tuple of (collateral, job_id)
189
  """
190
  try:
191
- url = "https://fitscore-agent-535960463668.us-central1.run.app/smart-hiring/generate"
192
 
193
  # Generate a unique job ID using UUID
194
  job_id = str(uuid.uuid4())
@@ -206,7 +1291,7 @@ def generate_smart_hiring_collateral(job_description_text: str) -> tuple[str, st
206
  }
207
 
208
  # Make the API request
209
- response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
210
 
211
  if response.status_code == 200:
212
  logger.info("Smart hiring collateral generated successfully")
@@ -234,7 +1319,7 @@ def generate_smart_hiring_collateral(job_description_text: str) -> tuple[str, st
234
  new_token = get_access_token()
235
  if new_token:
236
  headers['Authorization'] = f'Bearer {new_token}'
237
- response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
238
  if response.status_code == 200:
239
  logger.info("Smart hiring collateral generated successfully with fresh token")
240
  # Parse the response to extract smart_hiring_criteria
@@ -777,7 +1862,7 @@ def analyze_job_fit(job_description: str, resume_file_path: str, job_row: pd.Ser
777
  Analyze job-candidate fit using the external API
778
  """
779
 
780
- url = "https://fitscore-agent-535960463668.us-central1.run.app/analyze"
781
 
782
  # Check if resume file exists
783
  if not os.path.exists(resume_file_path):
@@ -820,8 +1905,8 @@ def analyze_job_fit(job_description: str, resume_file_path: str, job_row: pd.Ser
820
 
821
  try:
822
  # Make the API request with configured timeout
823
- response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
824
-
825
  # If we get an authentication error, try to get a fresh token and retry once
826
  if response.status_code == 401:
827
  logger.warning("Authentication failed, getting fresh token...")
@@ -833,7 +1918,7 @@ def analyze_job_fit(job_description: str, resume_file_path: str, job_row: pd.Ser
833
  # Close the previous file and reopen
834
  files['resume'][1].close()
835
  files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
836
- response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
837
  else:
838
  # If we can't get a fresh token, return error
839
  return {"error": "Authentication failed and could not obtain fresh token"}
 
1
+ # import pandas as pd
2
+ # import requests
3
+ # from pydantic import BaseModel, Field
4
+ # from typing import List, Tuple, Optional
5
+ # from langchain_openai import ChatOpenAI
6
+ # from langchain_core.prompts import ChatPromptTemplate
7
+ # import os
8
+ # from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Depends, Header, Request
9
+ # from fastapi.responses import JSONResponse
10
+ # from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
11
+ # from fastapi.middleware.cors import CORSMiddleware
12
+ # import json
13
+ # import tempfile
14
+ # import shutil
15
+ # import PyPDF2
16
+ # from dotenv import load_dotenv
17
+ # import pdfplumber
18
+ # import re
19
+ # from db import *
20
+ # import time
21
+ # import asyncio
22
+ # from contextlib import asynccontextmanager
23
+ # import logging
24
+ # from sqlalchemy.pool import NullPool
25
+ # from cloud_config import *
26
+ # import uuid
27
+
28
+ # # Load environment variables
29
+ # load_dotenv()
30
+
31
+ # # Configure logging for Cloud Run
32
+ # logging.basicConfig(
33
+ # level=getattr(logging, LOG_LEVEL),
34
+ # format=LOG_FORMAT
35
+ # )
36
+ # logger = logging.getLogger(__name__)
37
+
38
+ # # Global variable to store access token
39
+ # access_token = None
40
+
41
+ # # Startup/shutdown events
42
+ # @asynccontextmanager
43
+ # async def lifespan(app: FastAPI):
44
+ # # Startup
45
+ # logger.info("Starting up Job Recommendation API...")
46
+ # # You can initialize connection pools here if needed
47
+ # yield
48
+ # # Shutdown
49
+ # logger.info("Shutting down Job Recommendation API...")
50
+ # # Close any open connections here
51
+
52
+ # # Initialize FastAPI app with lifespan
53
+ # app = FastAPI(
54
+ # title="Job Recommendation API",
55
+ # description="API for processing resumes and recommending jobs",
56
+ # lifespan=lifespan
57
+ # )
58
+
59
+ # # Add CORS middleware for cloud deployment
60
+ # app.add_middleware(
61
+ # CORSMiddleware,
62
+ # allow_origins=["*"], # Configure based on your needs
63
+ # allow_credentials=True,
64
+ # allow_methods=["*"],
65
+ # allow_headers=["*"],
66
+ # )
67
+
68
+ # # Add request ID middleware for better tracing
69
+ # @app.middleware("http")
70
+ # async def add_request_id(request: Request, call_next):
71
+ # request_id = f"{time.time()}-{request.client.host}"
72
+ # request.state.request_id = request_id
73
+
74
+ # # Log the request
75
+ # logger.info(f"Request ID: {request_id} - {request.method} {request.url.path}")
76
+
77
+ # try:
78
+ # response = await call_next(request)
79
+ # response.headers["X-Request-ID"] = request_id
80
+ # return response
81
+ # except Exception as e:
82
+ # logger.error(f"Request ID: {request_id} - Error: {str(e)}")
83
+ # raise
84
+
85
+ # # Security configuration
86
+ # API_KEY = os.getenv("API_KEY")
87
+ # security = HTTPBearer()
88
+
89
+ # def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
90
+ # """
91
+ # Verify the API key from the Authorization header
92
+ # """
93
+ # if not API_KEY:
94
+ # logger.error("API key not configured")
95
+ # raise HTTPException(
96
+ # status_code=500,
97
+ # detail="API key not configured",
98
+ # )
99
+
100
+ # if credentials.credentials != API_KEY:
101
+ # logger.warning("Invalid API key attempt")
102
+ # raise HTTPException(
103
+ # status_code=401,
104
+ # detail="Invalid API key",
105
+ # headers={"WWW-Authenticate": "Bearer"},
106
+ # )
107
+ # return credentials.credentials
108
+
109
+ # # Initialize OpenAI client with error handling
110
+ # try:
111
+ # llm = ChatOpenAI(
112
+ # model="gpt-4o-mini",
113
+ # temperature=0,
114
+ # api_key=os.getenv("OPENAI_API_KEY")
115
+ # )
116
+ # logger.info("OpenAI client initialized successfully")
117
+ # except Exception as e:
118
+ # logger.error(f"Failed to initialize OpenAI client: {e}")
119
+ # raise
120
+
121
+ # # Initialize database engine with connection pooling suitable for Cloud Run
122
+ # def get_engine():
123
+ # """
124
+ # Get database engine with NullPool for Cloud Run
125
+ # """
126
+ # try:
127
+ # conn_string = f"postgresql://{DB_PARAMS['user']}:{DB_PARAMS['password']}@{DB_PARAMS['host']}:{DB_PARAMS['port']}/{DB_PARAMS['dbname']}"
128
+ # # Use NullPool for Cloud Run to avoid connection issues
129
+ # engine = create_engine(conn_string, poolclass=NullPool, pool_pre_ping=True)
130
+ # logger.info("Database engine created successfully")
131
+ # return engine
132
+ # except Exception as e:
133
+ # logger.error(f"Failed to create database engine: {e}")
134
+ # raise
135
+
136
+ # # Initialize database engine
137
+ # engine = get_engine()
138
+
139
+ # def get_access_token():
140
+ # """
141
+ # Get access token for the external API with better error handling
142
+ # """
143
+ # global access_token
144
+
145
+ # # If we already have a token, return it
146
+ # if access_token:
147
+ # return access_token
148
+
149
+ # try:
150
+ # login_url = "https://fitscore-agent-535960463668.us-central1.run.app/auth/login"
151
+ # login_data = {
152
+ # "email": "[email protected]",
153
+ # "password": "Password@123"
154
+ # }
155
+ # login_headers = {
156
+ # 'accept': 'application/json',
157
+ # 'Content-Type': 'application/json'
158
+ # }
159
+
160
+ # # Add timeout to prevent hanging
161
+ # login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=LOGIN_TIMEOUT)
162
+
163
+ # if login_response.status_code == 200:
164
+ # login_result = login_response.json()
165
+ # access_token = login_result.get('data', {}).get('tokens', {}).get('accessToken')
166
+ # if access_token:
167
+ # logger.info("Successfully obtained access token")
168
+ # return access_token
169
+ # else:
170
+ # logger.error("Login successful but no access token found in response")
171
+ # return None
172
+ # else:
173
+ # logger.error(f"Login failed with status {login_response.status_code}: {login_response.text}")
174
+ # return None
175
+ # except requests.exceptions.Timeout:
176
+ # logger.error("Login request timed out")
177
+ # return None
178
+ # except requests.exceptions.RequestException as e:
179
+ # logger.error(f"Network error during login: {e}")
180
+ # return None
181
+ # except Exception as e:
182
+ # logger.error(f"Unexpected error getting access token: {e}")
183
+ # return None
184
+
185
+ # def generate_smart_hiring_collateral(job_description_text: str) -> tuple[str, str]:
186
+ # """
187
+ # Generate collateral using the smart-hiring/generate endpoint
188
+ # Returns a tuple of (collateral, job_id)
189
+ # """
190
+ # try:
191
+ # url = "https://fitscore-agent-535960463668.us-central1.run.app/smart-hiring/generate"
192
+
193
+ # # Generate a unique job ID using UUID
194
+ # job_id = str(uuid.uuid4())
195
+
196
+ # # Prepare headers with authentication
197
+ # headers = {
198
+ # 'accept': 'application/json',
199
+ # 'Authorization': f'Bearer {get_access_token()}'
200
+ # }
201
+
202
+ # # Prepare payload
203
+ # payload = {
204
+ # 'job_id': job_id,
205
+ # 'job_description_text': job_description_text
206
+ # }
207
+
208
+ # # Make the API request
209
+ # response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
210
+
211
+ # if response.status_code == 200:
212
+ # logger.info("Smart hiring collateral generated successfully")
213
+ # # Parse the response to extract smart_hiring_criteria
214
+ # try:
215
+ # response_data = response.json()
216
+ # if response_data.get('success') and 'data' in response_data:
217
+ # smart_hiring_criteria = response_data['data'].get('smart_hiring_criteria', '')
218
+ # if smart_hiring_criteria:
219
+ # logger.info("Successfully extracted smart hiring criteria")
220
+ # return smart_hiring_criteria, job_id
221
+ # else:
222
+ # logger.warning("No smart_hiring_criteria found in response")
223
+ # return "", job_id
224
+ # else:
225
+ # logger.warning("Invalid response format from smart hiring API")
226
+ # return "", job_id
227
+ # except json.JSONDecodeError as e:
228
+ # logger.error(f"Failed to parse smart hiring response as JSON: {e}")
229
+ # return "", job_id
230
+ # elif response.status_code == 401:
231
+ # logger.warning("Authentication failed for smart hiring, getting fresh token...")
232
+ # global access_token
233
+ # access_token = None # Reset the token
234
+ # new_token = get_access_token()
235
+ # if new_token:
236
+ # headers['Authorization'] = f'Bearer {new_token}'
237
+ # response = requests.post(url, headers=headers, data=payload, timeout=EXTERNAL_API_TIMEOUT)
238
+ # if response.status_code == 200:
239
+ # logger.info("Smart hiring collateral generated successfully with fresh token")
240
+ # # Parse the response to extract smart_hiring_criteria
241
+ # try:
242
+ # response_data = response.json()
243
+ # if response_data.get('success') and 'data' in response_data:
244
+ # smart_hiring_criteria = response_data['data'].get('smart_hiring_criteria', '')
245
+ # if smart_hiring_criteria:
246
+ # logger.info("Successfully extracted smart hiring criteria with fresh token")
247
+ # return smart_hiring_criteria, job_id
248
+ # else:
249
+ # logger.warning("No smart_hiring_criteria found in response with fresh token")
250
+ # return "", job_id
251
+ # else:
252
+ # logger.warning("Invalid response format from smart hiring API with fresh token")
253
+ # return "", job_id
254
+ # except json.JSONDecodeError as e:
255
+ # logger.error(f"Failed to parse smart hiring response as JSON with fresh token: {e}")
256
+ # return "", job_id
257
+ # else:
258
+ # logger.error(f"Smart hiring API call failed with status {response.status_code}")
259
+ # return "", job_id
260
+ # else:
261
+ # logger.error("Could not obtain fresh token for smart hiring")
262
+ # return "", job_id
263
+ # else:
264
+ # logger.error(f"Smart hiring API call failed with status {response.status_code}: {response.text}")
265
+ # return "", job_id
266
+
267
+ # except requests.exceptions.Timeout:
268
+ # logger.error(f"Smart hiring API request timed out after {EXTERNAL_API_TIMEOUT} seconds")
269
+ # return "", ""
270
+ # except Exception as e:
271
+ # logger.error(f"Exception occurred in smart hiring generation: {str(e)}")
272
+ # return "", ""
273
+
274
+ # class structure(BaseModel):
275
+ # name: str = Field(description="Name of the candidate")
276
+ # location: str = Field(description="The location of the candidate. Extract city and state if possible.")
277
+ # skills: List[str] = Field(description="List of individual skills of the candidate")
278
+ # ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
279
+ # email: str = Field(description="The email of the candidate")
280
+ # yoe: str = Field(description="Years of experience of the candidate.")
281
+ # experience: str = Field(description="A brief summary of the candidate's past experience.")
282
+ # industry: str = Field(description="The industry the candidate has experience in.(Tech,Legal,Finance/Accounting,Healthcare,Industrial,Logistics,Telecom,Admin,Other)")
283
+
284
+ # class JobAnalysis(BaseModel):
285
+ # job_title: str
286
+ # company_name: str
287
+ # analysis: dict
288
+
289
+ # def extract_text_from_pdf(pdf_file_path: str) -> str:
290
+ # """
291
+ # Extract text from PDF file using multiple methods for better accuracy
292
+ # """
293
+ # text = ""
294
+
295
+ # # Method 1: Try pdfplumber (better for complex layouts)
296
+ # try:
297
+ # with pdfplumber.open(pdf_file_path) as pdf:
298
+ # for page in pdf.pages:
299
+ # page_text = page.extract_text()
300
+ # if page_text:
301
+ # text += page_text + "\n"
302
+ # if text.strip():
303
+ # logger.info(f"Successfully extracted text using pdfplumber: {len(text)} characters")
304
+ # return text.strip()
305
+ # except Exception as e:
306
+ # logger.warning(f"pdfplumber failed: {e}")
307
+
308
+ # # Method 2: Try PyPDF2 (fallback)
309
+ # try:
310
+ # with open(pdf_file_path, 'rb') as file:
311
+ # pdf_reader = PyPDF2.PdfReader(file)
312
+ # for page in pdf_reader.pages:
313
+ # page_text = page.extract_text()
314
+ # if page_text:
315
+ # text += page_text + "\n"
316
+ # if text.strip():
317
+ # logger.info(f"Successfully extracted text using PyPDF2: {len(text)} characters")
318
+ # return text.strip()
319
+ # except Exception as e:
320
+ # logger.error(f"PyPDF2 failed: {e}")
321
+
322
+ # # If both methods fail, return empty string
323
+ # logger.error("Failed to extract text from PDF")
324
+ # return ""
325
+
326
+ # def extract_resume_info(resume_text: str) -> structure:
327
+ # """
328
+ # Extract structured information from resume using LLM
329
+ # """
330
+ # prompt = ChatPromptTemplate.from_template("""
331
+ # You are an expert resume parser. Extract the following information from the resume text provided and return it in a structured JSON format.
332
+
333
+ # Resume Text:
334
+ # {resume_text}
335
+
336
+ # Please extract and structure the information according to the following schema:
337
+ # - name: Full name of the candidate
338
+ # - location: City and state if available, otherwise general location
339
+ # - skills: List of technical skills, tools, technologies, programming languages, etc.
340
+ # - ideal_jobs: Based on their experience, what types of jobs would be ideal for this candidate
341
+ # - email: Email address of the candidate (if found in resume)
342
+ # - yoe: Years of experience (extract from work history)
343
+ # - experience: Brief summary of their work experience and background
344
+ # - industry: Categorize into one of these industries: Tech, Legal, Finance/Accounting, Healthcare, Industrial, Logistics, Telecom, Admin, Other
345
+
346
+ # Return ONLY a valid JSON object with these fields. Do not include any other text or explanations.
347
+ # """)
348
+
349
+ # try:
350
+ # str_llm = llm.with_structured_output(structure)
351
+ # chain = prompt | str_llm
352
+ # response = chain.invoke({"resume_text": resume_text})
353
+
354
+ # validated_data = {
355
+ # 'name': response.name,
356
+ # 'location': response.location,
357
+ # 'email': response.email,
358
+ # 'skills': response.skills,
359
+ # 'ideal_jobs': response.ideal_jobs,
360
+ # 'yoe': response.yoe,
361
+ # 'experience': response.experience,
362
+ # 'industry': response.industry
363
+ # }
364
+
365
+ # logger.info(f"Successfully extracted resume info for: {validated_data['name']}")
366
+ # return validated_data
367
+
368
+ # except Exception as e:
369
+ # logger.error(f"Failed to extract resume info: {e}")
370
+ # return {
371
+ # 'name': "Unknown",
372
+ # 'location': "Unknown",
373
+ # 'email': "",
374
+ # 'skills': [],
375
+ # 'ideal_jobs': "Software Engineer",
376
+ # 'yoe': "0",
377
+ # 'experience': "No experience listed",
378
+ # 'industry': "Tech"
379
+ # }
380
+
381
+ # def filter_jobs_by_industry(jobs_df: pd.DataFrame, target_industry: str) -> pd.DataFrame:
382
+ # """
383
+ # Filter jobs by industry
384
+ # """
385
+ # # Map the extracted industry to database industry values
386
+ # industry_mapping = {
387
+ # 'Tech': ['technology', 'VC Tech'],
388
+ # 'Legal': ['Legal'],
389
+ # 'Finance/Accounting': ['finance/Accounting'],
390
+ # 'Healthcare': ['healthcare'],
391
+ # 'Industrial': ['industrial'],
392
+ # 'Logistics': ['logistics'],
393
+ # 'Telecom': ['telecom'],
394
+ # 'Admin': ['admin'],
395
+ # 'Other': ['Other']
396
+ # }
397
+
398
+ # target_industries = industry_mapping.get(target_industry, ['Tech'])
399
+
400
+ # # Filter jobs by industry (using database column name 'industry')
401
+ # filtered_jobs = jobs_df[jobs_df['industry'].isin(target_industries)]
402
+
403
+ # logger.info(f"Filtered {len(filtered_jobs)} jobs for industry: {target_industry}")
404
+ # return filtered_jobs
405
+
406
+ # def filter_jobs_by_location(jobs_df: pd.DataFrame, candidate_location: str) -> pd.DataFrame:
407
+ # """
408
+ # Filter jobs by location matching the candidate's location
409
+ # """
410
+ # if not candidate_location or candidate_location.lower() in ['unknown', 'n/a', '']:
411
+ # logger.info(f"No location info provided, returning all {len(jobs_df)} jobs")
412
+ # return jobs_df # Return all jobs if no location info
413
+
414
+ # # Clean and normalize candidate location
415
+ # candidate_location = candidate_location.lower().strip()
416
+ # logger.info(f"Filtering jobs for candidate location: {candidate_location}")
417
+
418
+ # # Extract state abbreviations and full names
419
+ # state_mapping = {
420
+ # 'alabama': 'al', 'alaska': 'ak', 'arizona': 'az', 'arkansas': 'ar', 'california': 'ca',
421
+ # 'colorado': 'co', 'connecticut': 'ct', 'delaware': 'de', 'district of columbia': 'dc', 'florida': 'fl', 'georgia': 'ga',
422
+ # 'hawaii': 'hi', 'idaho': 'id', 'illinois': 'il', 'indiana': 'in', 'iowa': 'ia',
423
+ # 'kansas': 'ks', 'kentucky': 'ky', 'louisiana': 'la', 'maine': 'me', 'maryland': 'md',
424
+ # 'massachusetts': 'ma', 'michigan': 'mi', 'minnesota': 'mn', 'mississippi': 'ms', 'missouri': 'mo',
425
+ # 'montana': 'mt', 'nebraska': 'ne', 'nevada': 'nv', 'new hampshire': 'nh', 'new jersey': 'nj',
426
+ # 'new mexico': 'nm', 'new york': 'ny', 'north carolina': 'nc', 'north dakota': 'nd', 'ohio': 'oh',
427
+ # 'oklahoma': 'ok', 'oregon': 'or', 'pennsylvania': 'pa', 'rhode island': 'ri', 'south carolina': 'sc',
428
+ # 'south dakota': 'sd', 'tennessee': 'tn', 'texas': 'tx', 'utah': 'ut', 'vermont': 'vt',
429
+ # 'virginia': 'va', 'washington': 'wa', 'west virginia': 'wv', 'wisconsin': 'wi', 'wyoming': 'wy'
430
+ # }
431
+
432
+ # # Create location patterns to match
433
+ # location_patterns = []
434
+
435
+ # # Add the original location
436
+ # location_patterns.append(candidate_location)
437
+
438
+ # # Add state variations
439
+ # for state_name, state_abbr in state_mapping.items():
440
+ # if state_name in candidate_location or state_abbr in candidate_location:
441
+ # location_patterns.extend([state_name, state_abbr])
442
+
443
+ # # Add common city variations (extract city name)
444
+ # city_match = re.search(r'^([^,]+)', candidate_location)
445
+ # if city_match:
446
+ # city_name = city_match.group(1).strip()
447
+ # location_patterns.append(city_name)
448
+
449
+ # # Add remote/anywhere patterns if location is remote
450
+ # if 'remote' in candidate_location or 'anywhere' in candidate_location:
451
+ # location_patterns.extend(['remote', 'anywhere', 'work from home', 'wfh'])
452
+
453
+ # logger.info(f"Location patterns to match: {location_patterns}")
454
+
455
+ # # Filter jobs by location
456
+ # matching_jobs = []
457
+
458
+ # for _, job_row in jobs_df.iterrows():
459
+ # job_location = str(job_row.get('job_location', '')).lower()
460
+
461
+ # # Check if any location pattern matches
462
+ # location_matches = any(pattern in job_location for pattern in location_patterns)
463
+
464
+ # # Also check for remote jobs if candidate location includes remote
465
+ # if 'remote' in candidate_location and any(remote_term in job_location for remote_term in ['remote', 'anywhere', 'work from home', 'wfh']):
466
+ # location_matches = True
467
+
468
+ # # Check for exact city/state matches
469
+ # if candidate_location in job_location or job_location in candidate_location:
470
+ # location_matches = True
471
+
472
+ # if location_matches:
473
+ # matching_jobs.append(job_row)
474
+
475
+ # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
476
+ # logger.info(f"Found {len(matching_jobs)} jobs matching location out of {len(jobs_df)} total jobs")
477
+
478
+ # return result_df
479
+
480
+ # def extract_experience_requirement(requirements_text: str) -> dict:
481
+ # """
482
+ # Extract experience requirements from job requirements text
483
+ # Returns a dictionary with min_years, max_years, and level
484
+ # """
485
+ # if not requirements_text or pd.isna(requirements_text):
486
+ # return {'min_years': 0, 'max_years': 999, 'level': 'any'}
487
+
488
+ # requirements_text = str(requirements_text).lower()
489
+
490
+ # # Common experience patterns
491
+ # experience_patterns = [
492
+ # # Specific year ranges
493
+ # r'(\d+)[\-\+]\s*(\d+)\s*years?\s*experience',
494
+ # r'(\d+)\s*to\s*(\d+)\s*years?\s*experience',
495
+ # r'(\d+)\s*-\s*(\d+)\s*years?\s*experience',
496
+
497
+ # # Minimum years
498
+ # r'(\d+)\+?\s*years?\s*experience',
499
+ # r'minimum\s*(\d+)\s*years?\s*experience',
500
+ # r'at\s*least\s*(\d+)\s*years?\s*experience',
501
+
502
+ # # Level-based patterns
503
+ # r'(entry\s*level|junior|associate)',
504
+ # r'(mid\s*level|intermediate|mid\s*senior)',
505
+ # r'(senior|lead|principal|staff)',
506
+ # r'(executive|director|vp|chief|c\s*level)',
507
+
508
+ # # Specific year mentions
509
+ # r'(\d+)\s*years?\s*in\s*the\s*field',
510
+ # r'(\d+)\s*years?\s*of\s*professional\s*experience',
511
+ # r'(\d+)\s*years?\s*of\s*relevant\s*experience'
512
+ # ]
513
+
514
+ # min_years = 0
515
+ # max_years = 999
516
+ # level = 'any'
517
+
518
+ # # Check for specific year ranges
519
+ # for pattern in experience_patterns[:3]: # First 3 patterns are for ranges
520
+ # matches = re.findall(pattern, requirements_text)
521
+ # if matches:
522
+ # try:
523
+ # min_years = int(matches[0][0])
524
+ # max_years = int(matches[0][1])
525
+ # break
526
+ # except (ValueError, IndexError):
527
+ # continue
528
+
529
+ # # Check for minimum years if no range found
530
+ # if min_years == 0:
531
+ # for pattern in experience_patterns[3:6]: # Minimum year patterns
532
+ # matches = re.findall(pattern, requirements_text)
533
+ # if matches:
534
+ # try:
535
+ # min_years = int(matches[0])
536
+ # break
537
+ # except (ValueError, IndexError):
538
+ # continue
539
+
540
+ # # Check for level-based requirements
541
+ # for pattern in experience_patterns[6:10]: # Level patterns
542
+ # matches = re.findall(pattern, requirements_text)
543
+ # if matches:
544
+ # level_match = matches[0].lower()
545
+ # if 'entry' in level_match or 'junior' in level_match or 'associate' in level_match:
546
+ # level = 'entry'
547
+ # if min_years == 0:
548
+ # min_years = 0
549
+ # max_years = 2
550
+ # elif 'mid' in level_match or 'intermediate' in level_match:
551
+ # level = 'mid'
552
+ # if min_years == 0:
553
+ # min_years = 2
554
+ # max_years = 5
555
+ # elif 'senior' in level_match or 'lead' in level_match or 'principal' in level_match or 'staff' in level_match:
556
+ # level = 'senior'
557
+ # if min_years == 0:
558
+ # min_years = 5
559
+ # max_years = 10
560
+ # elif 'executive' in level_match or 'director' in level_match or 'vp' in level_match or 'chief' in level_match:
561
+ # level = 'executive'
562
+ # if min_years == 0:
563
+ # min_years = 10
564
+ # max_years = 999
565
+ # break
566
+
567
+ # # Check for specific year mentions if still no match
568
+ # if min_years == 0:
569
+ # for pattern in experience_patterns[10:]: # Specific year mention patterns
570
+ # matches = re.findall(pattern, requirements_text)
571
+ # if matches:
572
+ # try:
573
+ # min_years = int(matches[0])
574
+ # max_years = min_years + 2 # Add buffer
575
+ # break
576
+ # except (ValueError, IndexError):
577
+ # continue
578
+
579
+ # return {
580
+ # 'min_years': min_years,
581
+ # 'max_years': max_years,
582
+ # 'level': level
583
+ # }
584
+
585
+ # def filter_jobs_by_experience(jobs_df: pd.DataFrame, candidate_yoe: str) -> pd.DataFrame:
586
+ # """
587
+ # Filter jobs by experience level matching the candidate's years of experience
588
+ # """
589
+ # if not candidate_yoe or candidate_yoe.lower() in ['unknown', 'n/a', '']:
590
+ # logger.info(f"No experience info provided, returning all {len(jobs_df)} jobs")
591
+ # return jobs_df
592
+
593
+ # # Extract numeric years from candidate experience
594
+ # try:
595
+ # # Handle various formats like "5 years", "5+ years", "5-7 years", etc.
596
+ # yoe_match = re.search(r'(\d+(?:\.\d+)?)', str(candidate_yoe))
597
+ # if yoe_match:
598
+ # candidate_years = float(yoe_match.group(1))
599
+ # else:
600
+ # logger.warning(f"Could not extract years from: {candidate_yoe}")
601
+ # return jobs_df
602
+ # except (ValueError, TypeError):
603
+ # logger.error(f"Invalid experience format: {candidate_yoe}")
604
+ # return jobs_df
605
+
606
+ # logger.info(f"Filtering jobs for candidate with {candidate_years} years of experience")
607
+
608
+ # # Filter jobs by experience requirements
609
+ # matching_jobs = []
610
+
611
+ # for _, job_row in jobs_df.iterrows():
612
+ # requirements_text = str(job_row.get('requirements', ''))
613
+ # experience_req = extract_experience_requirement(requirements_text)
614
+
615
+ # # Check if candidate's experience matches the job requirements
616
+ # if (candidate_years >= experience_req['min_years'] and
617
+ # candidate_years <= experience_req['max_years']):
618
+ # matching_jobs.append(job_row)
619
+
620
+ # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
621
+ # logger.info(f"Found {len(matching_jobs)} jobs matching experience out of {len(jobs_df)} total jobs")
622
+
623
+ # return result_df
624
+
625
+ # def filter_jobs_by_priority(jobs_df: pd.DataFrame) -> pd.DataFrame:
626
+ # """
627
+ # Filter jobs to only include high priority jobs
628
+ # """
629
+ # if jobs_df.empty:
630
+ # logger.info("No jobs to filter by priority")
631
+ # return jobs_df
632
+
633
+ # # Filter jobs by priority - only include high priority jobs
634
+ # priority_filtered_jobs = jobs_df[jobs_df['priority'].str.lower() == 'high']
635
+
636
+ # logger.info(f"Found {len(priority_filtered_jobs)} high priority jobs out of {len(jobs_df)} total jobs")
637
+
638
+ # return priority_filtered_jobs
639
+
640
+ # def create_job_description(job_row: pd.Series) -> str:
641
+ # """
642
+ # Create a comprehensive job description from job data
643
+ # """
644
+ # description_parts = []
645
+
646
+ # if pd.notna(job_row.get('company_blurb')):
647
+ # description_parts.append(f"Company: {job_row['company_blurb']}")
648
+
649
+ # if pd.notna(job_row.get('company_culture')):
650
+ # description_parts.append(f"Company Culture: {job_row['company_culture']}")
651
+
652
+ # if pd.notna(job_row.get('description')):
653
+ # description_parts.append(f"Description: {job_row['description']}")
654
+
655
+ # if pd.notna(job_row.get('requirements')):
656
+ # description_parts.append(f"Requirements: {job_row['requirements']}")
657
+
658
+ # if pd.notna(job_row.get('role_responsibilities')):
659
+ # description_parts.append(f"Role Responsibilities: {job_row['role_responsibilities']}")
660
+
661
+ # if pd.notna(job_row.get('job_location')):
662
+ # description_parts.append(f"Location: {job_row['job_location']}")
663
+
664
+ # return "\n\n".join(description_parts)
665
+
666
+ # def create_jd_smart_hiring(job_row: pd.Series) -> str:
667
+ # """
668
+ # Create a smart hiring job description from job data
669
+ # """
670
+ # description_parts = []
671
+ # if pd.notna(job_row.get('description')):
672
+ # description_parts.append(f"Description: {job_row['description']}")
673
+ # if pd.notna(job_row.get('requirements')):
674
+ # description_parts.append(f"Requirements: {job_row['requirements']}")
675
+
676
+ # return "\n\n".join(description_parts)
677
+
678
+
679
+
680
+ # def clean_analysis_result(analysis_result: dict) -> dict:
681
+ # """
682
+ # Clean up the analysis result to only include final_score and summary
683
+ # """
684
+ # if not isinstance(analysis_result, dict):
685
+ # return analysis_result
686
+
687
+ # # Remove user_context if present
688
+ # if 'user_context' in analysis_result:
689
+ # del analysis_result['user_context']
690
+
691
+ # # Clean up final_response if present
692
+ # if 'final_response' in analysis_result:
693
+ # try:
694
+ # # Handle both string and dict formats
695
+ # if isinstance(analysis_result['final_response'], str):
696
+ # final_response = json.loads(analysis_result['final_response'])
697
+ # else:
698
+ # final_response = analysis_result['final_response']
699
+
700
+ # # Extract and format the evaluation data
701
+ # if 'evaluation' in final_response and len(final_response['evaluation']) > 0:
702
+ # evaluation = final_response['evaluation'][0]
703
+
704
+ # # Create a minimal structure with only final_score and summary
705
+ # cleaned_response = {
706
+ # 'final_score': evaluation.get('final_score', 0),
707
+ # 'summary': {}
708
+ # }
709
+
710
+ # # Extract summary information
711
+ # if 'summary' in evaluation and len(evaluation['summary']) > 0:
712
+ # summary = evaluation['summary'][0]
713
+ # cleaned_response['summary'] = {
714
+ # 'strengths': summary.get('strengths', []),
715
+ # 'weaknesses': summary.get('weaknesses', []),
716
+ # 'opportunities': summary.get('opportunities', []),
717
+ # 'recommendations': summary.get('recommendations', [])
718
+ # }
719
+
720
+ # analysis_result['final_response'] = cleaned_response
721
+
722
+ # except (json.JSONDecodeError, KeyError, IndexError) as e:
723
+ # logger.error(f"Error cleaning analysis result: {e}")
724
+ # # Keep original if cleaning fails
725
+ # pass
726
+
727
+ # return analysis_result
728
+
729
+ # def sort_jobs_by_score(job_analyses: list) -> list:
730
+ # """
731
+ # Sort jobs by final_score in descending order (highest scores first)
732
+ # """
733
+ # def extract_score(job_analysis):
734
+ # try:
735
+ # analysis = job_analysis.get('analysis', {})
736
+ # if 'final_response' in analysis and isinstance(analysis['final_response'], dict):
737
+ # return analysis['final_response'].get('final_score', 0)
738
+ # return 0
739
+ # except:
740
+ # return 0
741
+
742
+ # return sorted(job_analyses, key=extract_score, reverse=True)
743
+
744
+ # async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str, job_row: pd.Series = None, max_retries: int = 3) -> dict:
745
+ # """
746
+ # Analyze job-candidate fit with retry logic for resilience
747
+ # """
748
+ # for attempt in range(max_retries):
749
+ # try:
750
+ # result = analyze_job_fit(job_description, resume_file_path, job_row)
751
+ # if "error" not in result:
752
+ # return result
753
+
754
+ # # If authentication error and not last attempt, retry
755
+ # if "Authentication failed" in result.get("error", "") and attempt < max_retries - 1:
756
+ # logger.warning(f"Authentication failed, retrying... (attempt {attempt + 1}/{max_retries})")
757
+ # global access_token
758
+ # access_token = None # Reset token to force refresh
759
+ # await asyncio.sleep(2 ** attempt) # Exponential backoff
760
+ # continue
761
+
762
+ # # If timeout error and not last attempt, retry with longer timeout
763
+ # if "timed out" in result.get("error", "").lower() and attempt < max_retries - 1:
764
+ # logger.warning(f"Request timed out, retrying with longer timeout... (attempt {attempt + 1}/{max_retries})")
765
+ # await asyncio.sleep(2 ** attempt) # Exponential backoff
766
+ # continue
767
+
768
+ # return result
769
+ # except Exception as e:
770
+ # logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
771
+ # if attempt == max_retries - 1:
772
+ # return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
773
+ # await asyncio.sleep(2 ** attempt)
774
+
775
+ # def analyze_job_fit(job_description: str, resume_file_path: str, job_row: pd.Series = None) -> dict:
776
+ # """
777
+ # Analyze job-candidate fit using the external API
778
+ # """
779
+
780
+ # url = "https://fitscore-agent-535960463668.us-central1.run.app/analyze"
781
+
782
+ # # Check if resume file exists
783
+ # if not os.path.exists(resume_file_path):
784
+ # logger.error(f"Resume file not found: {resume_file_path}")
785
+ # return {"error": f"Resume file not found: {resume_file_path}"}
786
+
787
+
788
+ # # Prepare headers with authentication
789
+ # headers = {
790
+ # 'accept': 'application/json',
791
+ # 'Authorization': f'Bearer {get_access_token()}'
792
+ # }
793
+
794
+ # # Prepare form data
795
+ # files = {
796
+ # 'resume': (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
797
+ # }
798
+
799
+ # data = {
800
+ # 'jd_text': job_description
801
+ # }
802
+
803
+ # # Generate collateral if job_row is provided
804
+ # if job_row is not None:
805
+ # try:
806
+ # job_description_text = create_jd_smart_hiring(job_row)
807
+ # if job_description_text:
808
+ # collateral, job_id = generate_smart_hiring_collateral(job_description_text)
809
+ # if collateral:
810
+ # data['collateral'] = collateral
811
+ # data['job_id'] = job_id
812
+ # logger.info(f"Added collateral and job_id ({job_id}) to job fit analysis request")
813
+ # elif job_id:
814
+ # # Even if collateral is empty, we can still use the job_id
815
+ # data['job_id'] = job_id
816
+ # logger.info(f"Added job_id ({job_id}) to job fit analysis request (no collateral)")
817
+ # except Exception as e:
818
+ # logger.warning(f"Failed to generate collateral: {e}")
819
+ # # Continue without collateral if generation fails
820
+
821
+ # try:
822
+ # # Make the API request with configured timeout
823
+ # response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
824
+
825
+ # # If we get an authentication error, try to get a fresh token and retry once
826
+ # if response.status_code == 401:
827
+ # logger.warning("Authentication failed, getting fresh token...")
828
+ # global access_token
829
+ # access_token = None # Reset the token
830
+ # new_token = get_access_token()
831
+ # if new_token:
832
+ # headers['Authorization'] = f'Bearer {new_token}'
833
+ # # Close the previous file and reopen
834
+ # files['resume'][1].close()
835
+ # files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
836
+ # response = requests.post(url, headers=headers, files=files, data=data, timeout=EXTERNAL_API_TIMEOUT)
837
+ # else:
838
+ # # If we can't get a fresh token, return error
839
+ # return {"error": "Authentication failed and could not obtain fresh token"}
840
+
841
+ # if response.status_code == 200:
842
+ # logger.info("Job fit analysis completed successfully")
843
+ # return response.json()
844
+ # elif response.status_code == 401:
845
+ # # If we still get 401 after fresh token, return error
846
+ # return {"error": "Authentication failed even with fresh token"}
847
+ # else:
848
+ # logger.error(f"API call failed with status {response.status_code}")
849
+ # return {"error": f"API call failed with status {response.status_code}", "details": response.text}
850
+
851
+ # except requests.exceptions.Timeout:
852
+ # logger.error(f"API request timed out after {EXTERNAL_API_TIMEOUT} seconds")
853
+ # return {"error": f"API request timed out after {EXTERNAL_API_TIMEOUT} seconds"}
854
+ # except Exception as e:
855
+ # logger.error(f"Exception occurred: {str(e)}")
856
+ # return {"error": f"Exception occurred: {str(e)}"}
857
+ # finally:
858
+ # # Ensure the file is closed
859
+ # if 'resume' in files:
860
+ # try:
861
+ # files['resume'][1].close()
862
+ # except:
863
+ # pass
864
+
865
+ # @app.post("/process_resume_and_recommend_jobs")
866
+ # async def process_resume_and_recommend_jobs(
867
+ # resume: UploadFile = File(...),
868
+ # resume_text: str = Form(""),
869
+ # api_key: str = Depends(verify_api_key)
870
+ # ):
871
+ # """
872
+ # Process resume, extract information, filter jobs by industry, and analyze fit
873
+ # """
874
+ # request_start_time = time.time()
875
+
876
+ # try:
877
+ # logger.info(f"Processing resume: {resume.filename}")
878
+
879
+ # # Save uploaded file temporarily
880
+ # with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
881
+ # shutil.copyfileobj(resume.file, tmp_file)
882
+ # tmp_file_path = tmp_file.name
883
+
884
+ # try:
885
+ # # Extract text from PDF if no resume_text provided
886
+ # if not resume_text:
887
+ # resume_text = extract_text_from_pdf(tmp_file_path)
888
+ # if not resume_text:
889
+ # logger.error("Could not extract text from PDF file")
890
+ # return JSONResponse(
891
+ # status_code=400,
892
+ # content={"error": "Could not extract text from PDF file"}
893
+ # )
894
+
895
+ # # Extract resume information using LLM
896
+ # resume_info = extract_resume_info(resume_text)
897
+
898
+ # # Load jobs data from PostgreSQL database
899
+ # try:
900
+ # jobs_df = pd.read_sql_table("jobs", con=engine)
901
+ # candidates_df = pd.read_sql_table("candidates", con=engine)
902
+ # submissions_df = pd.read_sql_table("candidate_submissions", con=engine)
903
+ # logger.info(f"Loaded {len(jobs_df)} jobs, {len(candidates_df)} candidates, {len(submissions_df)} submissions")
904
+ # except Exception as db_error:
905
+ # logger.error(f"Database error: {db_error}")
906
+ # return JSONResponse(
907
+ # status_code=500,
908
+ # content={"error": "Database connection error"}
909
+ # )
910
+
911
+ # # Filter jobs by industry
912
+ # filtered_jobs = filter_jobs_by_industry(jobs_df, resume_info['industry'])
913
+
914
+ # if filtered_jobs.empty:
915
+ # logger.warning(f"No jobs found for industry: {resume_info['industry']}")
916
+ # return JSONResponse(
917
+ # status_code=404,
918
+ # content={"message": f"No jobs found for industry: {resume_info['industry']}"}
919
+ # )
920
+
921
+ # # Filter jobs by location
922
+ # location_filtered_jobs = filter_jobs_by_location(filtered_jobs, resume_info['location'])
923
+
924
+ # # Filter jobs by experience level
925
+ # experience_filtered_jobs = filter_jobs_by_experience(location_filtered_jobs, resume_info['yoe'])
926
+
927
+ # # Filter jobs by priority
928
+ # priority_filtered_jobs = filter_jobs_by_priority(experience_filtered_jobs)
929
+
930
+ # # Use priority filtered jobs if available, otherwise fall back to experience filtered jobs, then location filtered jobs
931
+ # if not priority_filtered_jobs.empty:
932
+ # jobs_to_analyze = priority_filtered_jobs
933
+ # elif not experience_filtered_jobs.empty:
934
+ # jobs_to_analyze = experience_filtered_jobs
935
+ # else:
936
+ # jobs_to_analyze = location_filtered_jobs
937
+
938
+ # # Create filtered_submission_df with job_ids from jobs_to_analyze
939
+ # job_ids_to_analyze = jobs_to_analyze['id'].tolist()
940
+ # filtered_submission_df = submissions_df[submissions_df['jobId'].isin(job_ids_to_analyze)]
941
+
942
+ # # Check if candidate email exists in candidates_df
943
+ # candidate_id = None
944
+ # if resume_info.get('email'):
945
+ # candidate_match = candidates_df[candidates_df['email'] == resume_info['email']]
946
+ # if not candidate_match.empty:
947
+ # candidate_id = candidate_match.iloc[0]['id']
948
+ # logger.info(f"Found existing candidate with ID: {candidate_id}")
949
+
950
+ # # Analyze job fit for each filtered job
951
+ # job_analyses = []
952
+
953
+ # # Use configured number of jobs to analyze
954
+ # for _, job_row in jobs_to_analyze.head(MAX_JOBS_TO_ANALYZE).iterrows():
955
+ # job_id = job_row.get('id')
956
+
957
+ # # Check if we have an existing submission for this candidate and job
958
+ # existing_submission = None
959
+ # if candidate_id and job_id:
960
+ # submission_match = filtered_submission_df[
961
+ # (filtered_submission_df['candidate_id'] == candidate_id) &
962
+ # (filtered_submission_df['jobId'] == job_id)
963
+ # ]
964
+ # if not submission_match.empty:
965
+ # existing_submission = submission_match.iloc[0]
966
+ # logger.info(f"Found existing submission for job_id: {job_id}, candidate_id: {candidate_id}")
967
+
968
+ # if existing_submission is not None:
969
+ # # Use existing fit score from submission
970
+ # fit_score = existing_submission.get('fit_score', 0)
971
+ # existing_analysis = {
972
+ # 'final_response': {
973
+ # 'final_score': fit_score,
974
+ # 'summary': {
975
+ # 'strengths': [],
976
+ # 'weaknesses': [],
977
+ # 'opportunities': [],
978
+ # 'recommendations': []
979
+ # }
980
+ # },
981
+ # 'source': 'existing_submission'
982
+ # }
983
+ # analysis_result = existing_analysis
984
+ # else:
985
+ # # Call API for new analysis with retry logic
986
+ # job_description = create_job_description(job_row)
987
+ # analysis_result = await analyze_job_fit_with_retry(job_description, tmp_file_path, job_row)
988
+ # analysis_result['source'] = 'api_call'
989
+
990
+ # # Clean up the analysis result
991
+ # cleaned_analysis = clean_analysis_result(analysis_result)
992
+
993
+ # job_analysis = JobAnalysis(
994
+ # job_title=job_row.get('job_title', 'Unknown'),
995
+ # company_name=job_row.get('company_name', 'Unknown'),
996
+ # analysis=cleaned_analysis
997
+ # )
998
+ # job_analyses.append(job_analysis.dict())
999
+
1000
+ # # Sort jobs by final_score in descending order (highest scores first)
1001
+ # job_analyses = sort_jobs_by_score(job_analyses)
1002
+
1003
+ # # Count existing submissions vs API calls
1004
+ # existing_submissions_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'existing_submission')
1005
+ # api_calls_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'api_call')
1006
+
1007
+ # # Clean up temporary file
1008
+ # os.unlink(tmp_file_path)
1009
+
1010
+ # # Calculate processing time
1011
+ # processing_time = time.time() - request_start_time
1012
+ # logger.info(f"Request completed in {processing_time:.2f} seconds")
1013
+
1014
+ # return {
1015
+ # "resume_info": resume_info,
1016
+ # "industry": resume_info['industry'],
1017
+ # "location": resume_info['location'],
1018
+ # "experience_years": resume_info['yoe'],
1019
+ # "jobs_analyzed": len(job_analyses),
1020
+ # "location_filtered": not location_filtered_jobs.empty,
1021
+ # "experience_filtered": not experience_filtered_jobs.empty,
1022
+ # "priority_filtered": not priority_filtered_jobs.empty,
1023
+ # "existing_submissions_used": existing_submissions_count,
1024
+ # "api_calls_made": api_calls_count,
1025
+ # "candidate_found": candidate_id is not None,
1026
+ # "processing_time_seconds": round(processing_time, 2),
1027
+ # "job_analyses": job_analyses
1028
+ # }
1029
+
1030
+ # except Exception as e:
1031
+ # # Clean up temporary file in case of error
1032
+ # if os.path.exists(tmp_file_path):
1033
+ # os.unlink(tmp_file_path)
1034
+ # raise e
1035
+
1036
+ # except Exception as e:
1037
+ # logger.error(f"Processing failed: {str(e)}", exc_info=True)
1038
+ # return JSONResponse(
1039
+ # status_code=500,
1040
+ # content={"error": f"Processing failed: {str(e)}"}
1041
+ # )
1042
+
1043
+ # @app.get("/health")
1044
+ # async def health_check(api_key: str = Depends(verify_api_key)):
1045
+ # """
1046
+ # Health check endpoint with database connectivity check
1047
+ # """
1048
+ # health_status = {
1049
+ # "status": "healthy",
1050
+ # "message": "Job Recommendation API is running",
1051
+ # "timestamp": time.time()
1052
+ # }
1053
+
1054
+ # # Check database connectivity
1055
+ # try:
1056
+ # with engine.connect() as conn:
1057
+ # result = conn.execute(text("SELECT 1"))
1058
+ # health_status["database"] = "connected"
1059
+ # except Exception as e:
1060
+ # logger.error(f"Database health check failed: {e}")
1061
+ # health_status["database"] = "disconnected"
1062
+ # health_status["status"] = "degraded"
1063
+
1064
+ # return health_status
1065
+
1066
+ # @app.get("/")
1067
+ # async def root():
1068
+ # """
1069
+ # Root endpoint
1070
+ # """
1071
+ # return {
1072
+ # "message": "Job Recommendation API",
1073
+ # "version": "1.0.0",
1074
+ # "docs": "/docs",
1075
+ # "health": "/health"
1076
+ # }
1077
+
1078
+ # if __name__ == "__main__":
1079
+ # import uvicorn
1080
+ # port = int(os.getenv("PORT", 8080))
1081
+ # logger.info(f"Starting server on port {port}")
1082
+ # uvicorn.run(app, host="0.0.0.0", port=port)
1083
+
1084
+
1085
+
1086
  import pandas as pd
1087
  import requests
1088
  from pydantic import BaseModel, Field
 
1232
  return access_token
1233
 
1234
  try:
1235
+ login_url = str(os.getenv("login_url"))
1236
  login_data = {
1237
+ "email": str(os.getenv("email")),
1238
+ "password": str(os.getenv("password"))
1239
  }
1240
  login_headers = {
1241
  'accept': 'application/json',
 
1243
  }
1244
 
1245
  # Add timeout to prevent hanging
1246
+ login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=None)
1247
 
1248
  if login_response.status_code == 200:
1249
  login_result = login_response.json()
 
1273
  Returns a tuple of (collateral, job_id)
1274
  """
1275
  try:
1276
+ url = str(os.getenv("smart_hiring_url"))
1277
 
1278
  # Generate a unique job ID using UUID
1279
  job_id = str(uuid.uuid4())
 
1291
  }
1292
 
1293
  # Make the API request
1294
+ response = requests.post(url, headers=headers, data=payload, timeout=None)
1295
 
1296
  if response.status_code == 200:
1297
  logger.info("Smart hiring collateral generated successfully")
 
1319
  new_token = get_access_token()
1320
  if new_token:
1321
  headers['Authorization'] = f'Bearer {new_token}'
1322
+ response = requests.post(url, headers=headers, data=payload, timeout=None)
1323
  if response.status_code == 200:
1324
  logger.info("Smart hiring collateral generated successfully with fresh token")
1325
  # Parse the response to extract smart_hiring_criteria
 
1862
  Analyze job-candidate fit using the external API
1863
  """
1864
 
1865
+ url = str(os.getenv("analyze_url"))
1866
 
1867
  # Check if resume file exists
1868
  if not os.path.exists(resume_file_path):
 
1905
 
1906
  try:
1907
  # Make the API request with configured timeout
1908
+ response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
1909
+
1910
  # If we get an authentication error, try to get a fresh token and retry once
1911
  if response.status_code == 401:
1912
  logger.warning("Authentication failed, getting fresh token...")
 
1918
  # Close the previous file and reopen
1919
  files['resume'][1].close()
1920
  files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
1921
+ response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
1922
  else:
1923
  # If we can't get a fresh token, return error
1924
  return {"error": "Authentication failed and could not obtain fresh token"}