ak0601 commited on
Commit
142f88b
·
verified ·
1 Parent(s): 13dfe17

Update reccomendation.py

Browse files
Files changed (1) hide show
  1. reccomendation.py +0 -954
reccomendation.py CHANGED
@@ -1,957 +1,3 @@
1
- # import pandas as pd
2
- # import requests
3
- # from pydantic import BaseModel, Field
4
- # from typing import List, Tuple, Optional
5
- # from langchain_openai import ChatOpenAI
6
- # from langchain_core.prompts import ChatPromptTemplate
7
- # import os
8
- # from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Depends, Header, Request
9
- # from fastapi.responses import JSONResponse
10
- # from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
11
- # from fastapi.middleware.cors import CORSMiddleware
12
- # import json
13
- # import tempfile
14
- # import shutil
15
- # import PyPDF2
16
- # from dotenv import load_dotenv
17
- # import pdfplumber
18
- # import re
19
- # from db import *
20
- # import time
21
- # import asyncio
22
- # from contextlib import asynccontextmanager
23
- # import logging
24
- # from sqlalchemy.pool import NullPool
25
-
26
- # # Load environment variables
27
- # load_dotenv()
28
-
29
- # # Configure logging for Cloud Run
30
- # logging.basicConfig(
31
- # level=logging.INFO,
32
- # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
33
- # )
34
- # logger = logging.getLogger(__name__)
35
-
36
- # # Global variable to store access token
37
- # access_token = None
38
-
39
- # # Startup/shutdown events
40
- # @asynccontextmanager
41
- # async def lifespan(app: FastAPI):
42
- # # Startup
43
- # logger.info("Starting up Job Recommendation API...")
44
- # # You can initialize connection pools here if needed
45
- # yield
46
- # # Shutdown
47
- # logger.info("Shutting down Job Recommendation API...")
48
- # # Close any open connections here
49
-
50
- # # Initialize FastAPI app with lifespan
51
- # app = FastAPI(
52
- # title="Job Recommendation API",
53
- # description="API for processing resumes and recommending jobs",
54
- # lifespan=lifespan
55
- # )
56
-
57
- # # Add CORS middleware for cloud deployment
58
- # app.add_middleware(
59
- # CORSMiddleware,
60
- # allow_origins=["*"], # Configure based on your needs
61
- # allow_credentials=True,
62
- # allow_methods=["*"],
63
- # allow_headers=["*"],
64
- # )
65
-
66
- # # Add request ID middleware for better tracing
67
- # @app.middleware("http")
68
- # async def add_request_id(request: Request, call_next):
69
- # request_id = f"{time.time()}-{request.client.host}"
70
- # request.state.request_id = request_id
71
-
72
- # # Log the request
73
- # logger.info(f"Request ID: {request_id} - {request.method} {request.url.path}")
74
-
75
- # try:
76
- # response = await call_next(request)
77
- # response.headers["X-Request-ID"] = request_id
78
- # return response
79
- # except Exception as e:
80
- # logger.error(f"Request ID: {request_id} - Error: {str(e)}")
81
- # raise
82
-
83
- # # Security configuration
84
- # API_KEY = os.getenv("API_KEY")
85
- # security = HTTPBearer()
86
-
87
- # def verify_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
88
- # """
89
- # Verify the API key from the Authorization header
90
- # """
91
- # if not API_KEY:
92
- # logger.error("API key not configured")
93
- # raise HTTPException(
94
- # status_code=500,
95
- # detail="API key not configured",
96
- # )
97
-
98
- # if credentials.credentials != API_KEY:
99
- # logger.warning("Invalid API key attempt")
100
- # raise HTTPException(
101
- # status_code=401,
102
- # detail="Invalid API key",
103
- # headers={"WWW-Authenticate": "Bearer"},
104
- # )
105
- # return credentials.credentials
106
-
107
- # # Initialize OpenAI client with error handling
108
- # try:
109
- # llm = ChatOpenAI(
110
- # model="gpt-4o-mini",
111
- # temperature=0,
112
- # api_key=os.getenv("OPENAI_API_KEY")
113
- # )
114
- # logger.info("OpenAI client initialized successfully")
115
- # except Exception as e:
116
- # logger.error(f"Failed to initialize OpenAI client: {e}")
117
- # raise
118
-
119
- # # Initialize database engine with connection pooling suitable for Cloud Run
120
- # def get_engine():
121
- # """
122
- # Get database engine with NullPool for Cloud Run
123
- # """
124
- # try:
125
- # conn_string = f"postgresql://{DB_PARAMS['user']}:{DB_PARAMS['password']}@{DB_PARAMS['host']}:{DB_PARAMS['port']}/{DB_PARAMS['dbname']}"
126
- # # Use NullPool for Cloud Run to avoid connection issues
127
- # engine = create_engine(conn_string, poolclass=NullPool, pool_pre_ping=True)
128
- # logger.info("Database engine created successfully")
129
- # return engine
130
- # except Exception as e:
131
- # logger.error(f"Failed to create database engine: {e}")
132
- # raise
133
-
134
- # # Initialize database engine
135
- # engine = get_engine()
136
-
137
- # def get_access_token():
138
- # """
139
- # Get access token for the external API with better error handling
140
- # """
141
- # global access_token
142
-
143
- # # If we already have a token, return it
144
- # if access_token:
145
- # return access_token
146
-
147
- # try:
148
- # login_url = "https://fitscore-agent-535960463668.us-central1.run.app/auth/login"
149
- # login_data = {
150
- # "email": "[email protected]",
151
- # "password": "Password@123"
152
- # }
153
- # login_headers = {
154
- # 'accept': 'application/json',
155
- # 'Content-Type': 'application/json'
156
- # }
157
-
158
- # # Add timeout to prevent hanging
159
- # login_response = requests.post(login_url, headers=login_headers, json=login_data, timeout=30)
160
-
161
- # if login_response.status_code == 200:
162
- # login_result = login_response.json()
163
- # access_token = login_result.get('data', {}).get('tokens', {}).get('accessToken')
164
- # if access_token:
165
- # logger.info("Successfully obtained access token")
166
- # return access_token
167
- # else:
168
- # logger.error("Login successful but no access token found in response")
169
- # return None
170
- # else:
171
- # logger.error(f"Login failed with status {login_response.status_code}: {login_response.text}")
172
- # return None
173
- # except requests.exceptions.Timeout:
174
- # logger.error("Login request timed out")
175
- # return None
176
- # except requests.exceptions.RequestException as e:
177
- # logger.error(f"Network error during login: {e}")
178
- # return None
179
- # except Exception as e:
180
- # logger.error(f"Unexpected error getting access token: {e}")
181
- # return None
182
-
183
- # class structure(BaseModel):
184
- # name: str = Field(description="Name of the candidate")
185
- # location: str = Field(description="The location of the candidate. Extract city and state if possible.")
186
- # skills: List[str] = Field(description="List of individual skills of the candidate")
187
- # ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
188
- # email: str = Field(description="The email of the candidate")
189
- # yoe: str = Field(description="Years of experience of the candidate.")
190
- # experience: str = Field(description="A brief summary of the candidate's past experience.")
191
- # industry: str = Field(description="The industry the candidate has experience in.(Tech,Legal,Finance/Accounting,Healthcare,Industrial,Logistics,Telecom,Admin,Other)")
192
-
193
- # class JobAnalysis(BaseModel):
194
- # job_title: str
195
- # company_name: str
196
- # analysis: dict
197
-
198
- # def extract_text_from_pdf(pdf_file_path: str) -> str:
199
- # """
200
- # Extract text from PDF file using multiple methods for better accuracy
201
- # """
202
- # text = ""
203
-
204
- # # Method 1: Try pdfplumber (better for complex layouts)
205
- # try:
206
- # with pdfplumber.open(pdf_file_path) as pdf:
207
- # for page in pdf.pages:
208
- # page_text = page.extract_text()
209
- # if page_text:
210
- # text += page_text + "\n"
211
- # if text.strip():
212
- # logger.info(f"Successfully extracted text using pdfplumber: {len(text)} characters")
213
- # return text.strip()
214
- # except Exception as e:
215
- # logger.warning(f"pdfplumber failed: {e}")
216
-
217
- # # Method 2: Try PyPDF2 (fallback)
218
- # try:
219
- # with open(pdf_file_path, 'rb') as file:
220
- # pdf_reader = PyPDF2.PdfReader(file)
221
- # for page in pdf_reader.pages:
222
- # page_text = page.extract_text()
223
- # if page_text:
224
- # text += page_text + "\n"
225
- # if text.strip():
226
- # logger.info(f"Successfully extracted text using PyPDF2: {len(text)} characters")
227
- # return text.strip()
228
- # except Exception as e:
229
- # logger.error(f"PyPDF2 failed: {e}")
230
-
231
- # # If both methods fail, return empty string
232
- # logger.error("Failed to extract text from PDF")
233
- # return ""
234
-
235
- # def extract_resume_info(resume_text: str) -> structure:
236
- # """
237
- # Extract structured information from resume using LLM
238
- # """
239
- # prompt = ChatPromptTemplate.from_template("""
240
- # You are an expert resume parser. Extract the following information from the resume text provided and return it in a structured JSON format.
241
-
242
- # Resume Text:
243
- # {resume_text}
244
-
245
- # Please extract and structure the information according to the following schema:
246
- # - name: Full name of the candidate
247
- # - location: City and state if available, otherwise general location
248
- # - skills: List of technical skills, tools, technologies, programming languages, etc.
249
- # - ideal_jobs: Based on their experience, what types of jobs would be ideal for this candidate
250
- # - email: Email address of the candidate (if found in resume)
251
- # - yoe: Years of experience (extract from work history)
252
- # - experience: Brief summary of their work experience and background
253
- # - industry: Categorize into one of these industries: Tech, Legal, Finance/Accounting, Healthcare, Industrial, Logistics, Telecom, Admin, Other
254
-
255
- # Return ONLY a valid JSON object with these fields. Do not include any other text or explanations.
256
- # """)
257
-
258
- # try:
259
- # str_llm = llm.with_structured_output(structure)
260
- # chain = prompt | str_llm
261
- # response = chain.invoke({"resume_text": resume_text})
262
-
263
- # validated_data = {
264
- # 'name': response.name,
265
- # 'location': response.location,
266
- # 'email': response.email,
267
- # 'skills': response.skills,
268
- # 'ideal_jobs': response.ideal_jobs,
269
- # 'yoe': response.yoe,
270
- # 'experience': response.experience,
271
- # 'industry': response.industry
272
- # }
273
-
274
- # logger.info(f"Successfully extracted resume info for: {validated_data['name']}")
275
- # return validated_data
276
-
277
- # except Exception as e:
278
- # logger.error(f"Failed to extract resume info: {e}")
279
- # return {
280
- # 'name': "Unknown",
281
- # 'location': "Unknown",
282
- # 'email': "",
283
- # 'skills': [],
284
- # 'ideal_jobs': "Software Engineer",
285
- # 'yoe': "0",
286
- # 'experience': "No experience listed",
287
- # 'industry': "Tech"
288
- # }
289
-
290
- # def filter_jobs_by_industry(jobs_df: pd.DataFrame, target_industry: str) -> pd.DataFrame:
291
- # """
292
- # Filter jobs by industry
293
- # """
294
- # # Map the extracted industry to database industry values
295
- # industry_mapping = {
296
- # 'Tech': ['technology', 'VC Tech'],
297
- # 'Legal': ['Legal'],
298
- # 'Finance/Accounting': ['finance/Accounting'],
299
- # 'Healthcare': ['healthcare'],
300
- # 'Industrial': ['industrial'],
301
- # 'Logistics': ['logistics'],
302
- # 'Telecom': ['telecom'],
303
- # 'Admin': ['admin'],
304
- # 'Other': ['Other']
305
- # }
306
-
307
- # target_industries = industry_mapping.get(target_industry, ['Tech'])
308
-
309
- # # Filter jobs by industry (using database column name 'industry')
310
- # filtered_jobs = jobs_df[jobs_df['industry'].isin(target_industries)]
311
-
312
- # logger.info(f"Filtered {len(filtered_jobs)} jobs for industry: {target_industry}")
313
- # return filtered_jobs
314
-
315
- # def filter_jobs_by_location(jobs_df: pd.DataFrame, candidate_location: str) -> pd.DataFrame:
316
- # """
317
- # Filter jobs by location matching the candidate's location
318
- # """
319
- # if not candidate_location or candidate_location.lower() in ['unknown', 'n/a', '']:
320
- # logger.info(f"No location info provided, returning all {len(jobs_df)} jobs")
321
- # return jobs_df # Return all jobs if no location info
322
-
323
- # # Clean and normalize candidate location
324
- # candidate_location = candidate_location.lower().strip()
325
- # logger.info(f"Filtering jobs for candidate location: {candidate_location}")
326
-
327
- # # Extract state abbreviations and full names
328
- # state_mapping = {
329
- # 'alabama': 'al', 'alaska': 'ak', 'arizona': 'az', 'arkansas': 'ar', 'california': 'ca',
330
- # 'colorado': 'co', 'connecticut': 'ct', 'delaware': 'de', 'district of columbia': 'dc', 'florida': 'fl', 'georgia': 'ga',
331
- # 'hawaii': 'hi', 'idaho': 'id', 'illinois': 'il', 'indiana': 'in', 'iowa': 'ia',
332
- # 'kansas': 'ks', 'kentucky': 'ky', 'louisiana': 'la', 'maine': 'me', 'maryland': 'md',
333
- # 'massachusetts': 'ma', 'michigan': 'mi', 'minnesota': 'mn', 'mississippi': 'ms', 'missouri': 'mo',
334
- # 'montana': 'mt', 'nebraska': 'ne', 'nevada': 'nv', 'new hampshire': 'nh', 'new jersey': 'nj',
335
- # 'new mexico': 'nm', 'new york': 'ny', 'north carolina': 'nc', 'north dakota': 'nd', 'ohio': 'oh',
336
- # 'oklahoma': 'ok', 'oregon': 'or', 'pennsylvania': 'pa', 'rhode island': 'ri', 'south carolina': 'sc',
337
- # 'south dakota': 'sd', 'tennessee': 'tn', 'texas': 'tx', 'utah': 'ut', 'vermont': 'vt',
338
- # 'virginia': 'va', 'washington': 'wa', 'west virginia': 'wv', 'wisconsin': 'wi', 'wyoming': 'wy'
339
- # }
340
-
341
- # # Create location patterns to match
342
- # location_patterns = []
343
-
344
- # # Add the original location
345
- # location_patterns.append(candidate_location)
346
-
347
- # # Add state variations
348
- # for state_name, state_abbr in state_mapping.items():
349
- # if state_name in candidate_location or state_abbr in candidate_location:
350
- # location_patterns.extend([state_name, state_abbr])
351
-
352
- # # Add common city variations (extract city name)
353
- # city_match = re.search(r'^([^,]+)', candidate_location)
354
- # if city_match:
355
- # city_name = city_match.group(1).strip()
356
- # location_patterns.append(city_name)
357
-
358
- # # Add remote/anywhere patterns if location is remote
359
- # if 'remote' in candidate_location or 'anywhere' in candidate_location:
360
- # location_patterns.extend(['remote', 'anywhere', 'work from home', 'wfh'])
361
-
362
- # logger.info(f"Location patterns to match: {location_patterns}")
363
-
364
- # # Filter jobs by location
365
- # matching_jobs = []
366
-
367
- # for _, job_row in jobs_df.iterrows():
368
- # job_location = str(job_row.get('job_location', '')).lower()
369
-
370
- # # Check if any location pattern matches
371
- # location_matches = any(pattern in job_location for pattern in location_patterns)
372
-
373
- # # Also check for remote jobs if candidate location includes remote
374
- # if 'remote' in candidate_location and any(remote_term in job_location for remote_term in ['remote', 'anywhere', 'work from home', 'wfh']):
375
- # location_matches = True
376
-
377
- # # Check for exact city/state matches
378
- # if candidate_location in job_location or job_location in candidate_location:
379
- # location_matches = True
380
-
381
- # if location_matches:
382
- # matching_jobs.append(job_row)
383
-
384
- # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
385
- # logger.info(f"Found {len(matching_jobs)} jobs matching location out of {len(jobs_df)} total jobs")
386
-
387
- # return result_df
388
-
389
- # def extract_experience_requirement(requirements_text: str) -> dict:
390
- # """
391
- # Extract experience requirements from job requirements text
392
- # Returns a dictionary with min_years, max_years, and level
393
- # """
394
- # if not requirements_text or pd.isna(requirements_text):
395
- # return {'min_years': 0, 'max_years': 999, 'level': 'any'}
396
-
397
- # requirements_text = str(requirements_text).lower()
398
-
399
- # # Common experience patterns
400
- # experience_patterns = [
401
- # # Specific year ranges
402
- # r'(\d+)[\-\+]\s*(\d+)\s*years?\s*experience',
403
- # r'(\d+)\s*to\s*(\d+)\s*years?\s*experience',
404
- # r'(\d+)\s*-\s*(\d+)\s*years?\s*experience',
405
-
406
- # # Minimum years
407
- # r'(\d+)\+?\s*years?\s*experience',
408
- # r'minimum\s*(\d+)\s*years?\s*experience',
409
- # r'at\s*least\s*(\d+)\s*years?\s*experience',
410
-
411
- # # Level-based patterns
412
- # r'(entry\s*level|junior|associate)',
413
- # r'(mid\s*level|intermediate|mid\s*senior)',
414
- # r'(senior|lead|principal|staff)',
415
- # r'(executive|director|vp|chief|c\s*level)',
416
-
417
- # # Specific year mentions
418
- # r'(\d+)\s*years?\s*in\s*the\s*field',
419
- # r'(\d+)\s*years?\s*of\s*professional\s*experience',
420
- # r'(\d+)\s*years?\s*of\s*relevant\s*experience'
421
- # ]
422
-
423
- # min_years = 0
424
- # max_years = 999
425
- # level = 'any'
426
-
427
- # # Check for specific year ranges
428
- # for pattern in experience_patterns[:3]: # First 3 patterns are for ranges
429
- # matches = re.findall(pattern, requirements_text)
430
- # if matches:
431
- # try:
432
- # min_years = int(matches[0][0])
433
- # max_years = int(matches[0][1])
434
- # break
435
- # except (ValueError, IndexError):
436
- # continue
437
-
438
- # # Check for minimum years if no range found
439
- # if min_years == 0:
440
- # for pattern in experience_patterns[3:6]: # Minimum year patterns
441
- # matches = re.findall(pattern, requirements_text)
442
- # if matches:
443
- # try:
444
- # min_years = int(matches[0])
445
- # break
446
- # except (ValueError, IndexError):
447
- # continue
448
-
449
- # # Check for level-based requirements
450
- # for pattern in experience_patterns[6:10]: # Level patterns
451
- # matches = re.findall(pattern, requirements_text)
452
- # if matches:
453
- # level_match = matches[0].lower()
454
- # if 'entry' in level_match or 'junior' in level_match or 'associate' in level_match:
455
- # level = 'entry'
456
- # if min_years == 0:
457
- # min_years = 0
458
- # max_years = 2
459
- # elif 'mid' in level_match or 'intermediate' in level_match:
460
- # level = 'mid'
461
- # if min_years == 0:
462
- # min_years = 2
463
- # max_years = 5
464
- # elif 'senior' in level_match or 'lead' in level_match or 'principal' in level_match or 'staff' in level_match:
465
- # level = 'senior'
466
- # if min_years == 0:
467
- # min_years = 5
468
- # max_years = 10
469
- # elif 'executive' in level_match or 'director' in level_match or 'vp' in level_match or 'chief' in level_match:
470
- # level = 'executive'
471
- # if min_years == 0:
472
- # min_years = 10
473
- # max_years = 999
474
- # break
475
-
476
- # # Check for specific year mentions if still no match
477
- # if min_years == 0:
478
- # for pattern in experience_patterns[10:]: # Specific year mention patterns
479
- # matches = re.findall(pattern, requirements_text)
480
- # if matches:
481
- # try:
482
- # min_years = int(matches[0])
483
- # max_years = min_years + 2 # Add buffer
484
- # break
485
- # except (ValueError, IndexError):
486
- # continue
487
-
488
- # return {
489
- # 'min_years': min_years,
490
- # 'max_years': max_years,
491
- # 'level': level
492
- # }
493
-
494
- # def filter_jobs_by_experience(jobs_df: pd.DataFrame, candidate_yoe: str) -> pd.DataFrame:
495
- # """
496
- # Filter jobs by experience level matching the candidate's years of experience
497
- # """
498
- # if not candidate_yoe or candidate_yoe.lower() in ['unknown', 'n/a', '']:
499
- # logger.info(f"No experience info provided, returning all {len(jobs_df)} jobs")
500
- # return jobs_df
501
-
502
- # # Extract numeric years from candidate experience
503
- # try:
504
- # # Handle various formats like "5 years", "5+ years", "5-7 years", etc.
505
- # yoe_match = re.search(r'(\d+(?:\.\d+)?)', str(candidate_yoe))
506
- # if yoe_match:
507
- # candidate_years = float(yoe_match.group(1))
508
- # else:
509
- # logger.warning(f"Could not extract years from: {candidate_yoe}")
510
- # return jobs_df
511
- # except (ValueError, TypeError):
512
- # logger.error(f"Invalid experience format: {candidate_yoe}")
513
- # return jobs_df
514
-
515
- # logger.info(f"Filtering jobs for candidate with {candidate_years} years of experience")
516
-
517
- # # Filter jobs by experience requirements
518
- # matching_jobs = []
519
-
520
- # for _, job_row in jobs_df.iterrows():
521
- # requirements_text = str(job_row.get('requirements', ''))
522
- # experience_req = extract_experience_requirement(requirements_text)
523
-
524
- # # Check if candidate's experience matches the job requirements
525
- # if (candidate_years >= experience_req['min_years'] and
526
- # candidate_years <= experience_req['max_years']):
527
- # matching_jobs.append(job_row)
528
-
529
- # result_df = pd.DataFrame(matching_jobs) if matching_jobs else jobs_df
530
- # logger.info(f"Found {len(matching_jobs)} jobs matching experience out of {len(jobs_df)} total jobs")
531
-
532
- # return result_df
533
-
534
- # def filter_jobs_by_priority(jobs_df: pd.DataFrame) -> pd.DataFrame:
535
- # """
536
- # Filter jobs to only include high priority jobs
537
- # """
538
- # if jobs_df.empty:
539
- # logger.info("No jobs to filter by priority")
540
- # return jobs_df
541
-
542
- # # Filter jobs by priority - only include high priority jobs
543
- # priority_filtered_jobs = jobs_df[jobs_df['priority'].str.lower() == 'high']
544
-
545
- # logger.info(f"Found {len(priority_filtered_jobs)} high priority jobs out of {len(jobs_df)} total jobs")
546
-
547
- # return priority_filtered_jobs
548
-
549
- # def create_job_description(job_row: pd.Series) -> str:
550
- # """
551
- # Create a comprehensive job description from job data
552
- # """
553
- # description_parts = []
554
-
555
- # if pd.notna(job_row.get('company_blurb')):
556
- # description_parts.append(f"Company: {job_row['company_blurb']}")
557
-
558
- # if pd.notna(job_row.get('company_culture')):
559
- # description_parts.append(f"Company Culture: {job_row['company_culture']}")
560
-
561
- # if pd.notna(job_row.get('requirements')):
562
- # description_parts.append(f"Requirements: {job_row['requirements']}")
563
-
564
- # if pd.notna(job_row.get('role_responsibilities')):
565
- # description_parts.append(f"Role Responsibilities: {job_row['role_responsibilities']}")
566
-
567
- # if pd.notna(job_row.get('job_location')):
568
- # description_parts.append(f"Location: {job_row['job_location']}")
569
-
570
- # return "\n\n".join(description_parts)
571
-
572
- # def clean_analysis_result(analysis_result: dict) -> dict:
573
- # """
574
- # Clean up the analysis result to only include final_score and summary
575
- # """
576
- # if not isinstance(analysis_result, dict):
577
- # return analysis_result
578
-
579
- # # Remove user_context if present
580
- # if 'user_context' in analysis_result:
581
- # del analysis_result['user_context']
582
-
583
- # # Clean up final_response if present
584
- # if 'final_response' in analysis_result:
585
- # try:
586
- # # Handle both string and dict formats
587
- # if isinstance(analysis_result['final_response'], str):
588
- # final_response = json.loads(analysis_result['final_response'])
589
- # else:
590
- # final_response = analysis_result['final_response']
591
-
592
- # # Extract and format the evaluation data
593
- # if 'evaluation' in final_response and len(final_response['evaluation']) > 0:
594
- # evaluation = final_response['evaluation'][0]
595
-
596
- # # Create a minimal structure with only final_score and summary
597
- # cleaned_response = {
598
- # 'final_score': evaluation.get('final_score', 0),
599
- # 'summary': {}
600
- # }
601
-
602
- # # Extract summary information
603
- # if 'summary' in evaluation and len(evaluation['summary']) > 0:
604
- # summary = evaluation['summary'][0]
605
- # cleaned_response['summary'] = {
606
- # 'strengths': summary.get('strengths', []),
607
- # 'weaknesses': summary.get('weaknesses', []),
608
- # 'opportunities': summary.get('opportunities', []),
609
- # 'recommendations': summary.get('recommendations', [])
610
- # }
611
-
612
- # analysis_result['final_response'] = cleaned_response
613
-
614
- # except (json.JSONDecodeError, KeyError, IndexError) as e:
615
- # logger.error(f"Error cleaning analysis result: {e}")
616
- # # Keep original if cleaning fails
617
- # pass
618
-
619
- # return analysis_result
620
-
621
- # def sort_jobs_by_score(job_analyses: list) -> list:
622
- # """
623
- # Sort jobs by final_score in descending order (highest scores first)
624
- # """
625
- # def extract_score(job_analysis):
626
- # try:
627
- # analysis = job_analysis.get('analysis', {})
628
- # if 'final_response' in analysis and isinstance(analysis['final_response'], dict):
629
- # return analysis['final_response'].get('final_score', 0)
630
- # return 0
631
- # except:
632
- # return 0
633
-
634
- # return sorted(job_analyses, key=extract_score, reverse=True)
635
-
636
- # async def analyze_job_fit_with_retry(job_description: str, resume_file_path: str, max_retries: int = 3) -> dict:
637
- # """
638
- # Analyze job-candidate fit with retry logic for resilience
639
- # """
640
- # for attempt in range(max_retries):
641
- # try:
642
- # result = analyze_job_fit(job_description, resume_file_path)
643
- # if "error" not in result:
644
- # return result
645
-
646
- # # If authentication error and not last attempt, retry
647
- # if "Authentication failed" in result.get("error", "") and attempt < max_retries - 1:
648
- # logger.warning(f"Authentication failed, retrying... (attempt {attempt + 1}/{max_retries})")
649
- # global access_token
650
- # access_token = None # Reset token to force refresh
651
- # await asyncio.sleep(2 ** attempt) # Exponential backoff
652
- # continue
653
-
654
- # return result
655
- # except Exception as e:
656
- # logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
657
- # if attempt == max_retries - 1:
658
- # return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
659
- # await asyncio.sleep(2 ** attempt)
660
-
661
- # def analyze_job_fit(job_description: str, resume_file_path: str) -> dict:
662
- # """
663
- # Analyze job-candidate fit using the external API
664
- # """
665
-
666
- # url = "https://fitscore-agent-535960463668.us-central1.run.app/analyze"
667
-
668
- # # Check if resume file exists
669
- # if not os.path.exists(resume_file_path):
670
- # logger.error(f"Resume file not found: {resume_file_path}")
671
- # return {"error": f"Resume file not found: {resume_file_path}"}
672
-
673
-
674
- # # Prepare headers with authentication
675
- # headers = {
676
- # 'accept': 'application/json',
677
- # 'Authorization': f'Bearer {get_access_token()}'
678
- # }
679
-
680
- # # Prepare form data
681
- # files = {
682
- # 'resume': (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
683
- # }
684
-
685
- # data = {
686
- # 'jd_text': job_description
687
- # }
688
-
689
- # try:
690
- # # Make the API request with longer timeout for cloud environments
691
- # response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
692
-
693
- # # If we get an authentication error, try to get a fresh token and retry once
694
- # if response.status_code == 401:
695
- # logger.warning("Authentication failed, getting fresh token...")
696
- # global access_token
697
- # access_token = None # Reset the token
698
- # new_token = get_access_token()
699
- # if new_token:
700
- # headers['Authorization'] = f'Bearer {new_token}'
701
- # # Close the previous file and reopen
702
- # files['resume'][1].close()
703
- # files['resume'] = (os.path.basename(resume_file_path), open(resume_file_path, 'rb'), 'application/pdf')
704
- # response = requests.post(url, headers=headers, files=files, data=data, timeout=None)
705
- # else:
706
- # # If we can't get a fresh token, return error
707
- # return {"error": "Authentication failed and could not obtain fresh token"}
708
-
709
- # if response.status_code == 200:
710
- # logger.info("Job fit analysis completed successfully")
711
- # return response.json()
712
- # elif response.status_code == 401:
713
- # # If we still get 401 after fresh token, return error
714
- # return {"error": "Authentication failed even with fresh token"}
715
- # else:
716
- # logger.error(f"API call failed with status {response.status_code}")
717
- # return {"error": f"API call failed with status {response.status_code}", "details": response.text}
718
-
719
- # except requests.exceptions.Timeout:
720
- # logger.error("API request timed out")
721
- # return {"error": "API request timed out"}
722
- # except Exception as e:
723
- # logger.error(f"Exception occurred: {str(e)}")
724
- # return {"error": f"Exception occurred: {str(e)}"}
725
- # finally:
726
- # # Ensure the file is closed
727
- # if 'resume' in files:
728
- # try:
729
- # files['resume'][1].close()
730
- # except:
731
- # pass
732
-
733
- # @app.post("/process_resume_and_recommend_jobs")
734
- # async def process_resume_and_recommend_jobs(
735
- # resume: UploadFile = File(...),
736
- # resume_text: str = Form(""),
737
- # api_key: str = Depends(verify_api_key)
738
- # ):
739
- # """
740
- # Process resume, extract information, filter jobs by industry, and analyze fit
741
- # """
742
- # request_start_time = time.time()
743
-
744
- # try:
745
- # logger.info(f"Processing resume: {resume.filename}")
746
-
747
- # # Save uploaded file temporarily
748
- # with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
749
- # shutil.copyfileobj(resume.file, tmp_file)
750
- # tmp_file_path = tmp_file.name
751
-
752
- # try:
753
- # # Extract text from PDF if no resume_text provided
754
- # if not resume_text:
755
- # resume_text = extract_text_from_pdf(tmp_file_path)
756
- # if not resume_text:
757
- # logger.error("Could not extract text from PDF file")
758
- # return JSONResponse(
759
- # status_code=400,
760
- # content={"error": "Could not extract text from PDF file"}
761
- # )
762
-
763
- # # Extract resume information using LLM
764
- # resume_info = extract_resume_info(resume_text)
765
-
766
- # # Load jobs data from PostgreSQL database
767
- # try:
768
- # jobs_df = pd.read_sql_table("jobs", con=engine)
769
- # candidates_df = pd.read_sql_table("candidates", con=engine)
770
- # submissions_df = pd.read_sql_table("candidate_submissions", con=engine)
771
- # logger.info(f"Loaded {len(jobs_df)} jobs, {len(candidates_df)} candidates, {len(submissions_df)} submissions")
772
- # except Exception as db_error:
773
- # logger.error(f"Database error: {db_error}")
774
- # return JSONResponse(
775
- # status_code=500,
776
- # content={"error": "Database connection error"}
777
- # )
778
-
779
- # # Filter jobs by industry
780
- # filtered_jobs = filter_jobs_by_industry(jobs_df, resume_info['industry'])
781
-
782
- # if filtered_jobs.empty:
783
- # logger.warning(f"No jobs found for industry: {resume_info['industry']}")
784
- # return JSONResponse(
785
- # status_code=404,
786
- # content={"message": f"No jobs found for industry: {resume_info['industry']}"}
787
- # )
788
-
789
- # # Filter jobs by location
790
- # location_filtered_jobs = filter_jobs_by_location(filtered_jobs, resume_info['location'])
791
-
792
- # # Filter jobs by experience level
793
- # experience_filtered_jobs = filter_jobs_by_experience(location_filtered_jobs, resume_info['yoe'])
794
-
795
- # # Filter jobs by priority
796
- # priority_filtered_jobs = filter_jobs_by_priority(experience_filtered_jobs)
797
-
798
- # # Use priority filtered jobs if available, otherwise fall back to experience filtered jobs, then location filtered jobs
799
- # if not priority_filtered_jobs.empty:
800
- # jobs_to_analyze = priority_filtered_jobs
801
- # elif not experience_filtered_jobs.empty:
802
- # jobs_to_analyze = experience_filtered_jobs
803
- # else:
804
- # jobs_to_analyze = location_filtered_jobs
805
-
806
- # # Create filtered_submission_df with job_ids from jobs_to_analyze
807
- # job_ids_to_analyze = jobs_to_analyze['id'].tolist()
808
- # filtered_submission_df = submissions_df[submissions_df['jobId'].isin(job_ids_to_analyze)]
809
-
810
- # # Check if candidate email exists in candidates_df
811
- # candidate_id = None
812
- # if resume_info.get('email'):
813
- # candidate_match = candidates_df[candidates_df['email'] == resume_info['email']]
814
- # if not candidate_match.empty:
815
- # candidate_id = candidate_match.iloc[0]['id']
816
- # logger.info(f"Found existing candidate with ID: {candidate_id}")
817
-
818
- # # Analyze job fit for each filtered job
819
- # job_analyses = []
820
-
821
- # for _, job_row in jobs_to_analyze.head(20).iterrows(): # Analyze top 20 jobs
822
- # job_id = job_row.get('id')
823
-
824
- # # Check if we have an existing submission for this candidate and job
825
- # existing_submission = None
826
- # if candidate_id and job_id:
827
- # submission_match = filtered_submission_df[
828
- # (filtered_submission_df['candidate_id'] == candidate_id) &
829
- # (filtered_submission_df['jobId'] == job_id)
830
- # ]
831
- # if not submission_match.empty:
832
- # existing_submission = submission_match.iloc[0]
833
- # logger.info(f"Found existing submission for job_id: {job_id}, candidate_id: {candidate_id}")
834
-
835
- # if existing_submission is not None:
836
- # # Use existing fit score from submission
837
- # fit_score = existing_submission.get('fit_score', 0)
838
- # existing_analysis = {
839
- # 'final_response': {
840
- # 'final_score': fit_score,
841
- # 'summary': {
842
- # 'strengths': [],
843
- # 'weaknesses': [],
844
- # 'opportunities': [],
845
- # 'recommendations': []
846
- # }
847
- # },
848
- # 'source': 'existing_submission'
849
- # }
850
- # analysis_result = existing_analysis
851
- # else:
852
- # # Call API for new analysis with retry logic
853
- # job_description = create_job_description(job_row)
854
- # analysis_result = await analyze_job_fit_with_retry(job_description, tmp_file_path)
855
- # analysis_result['source'] = 'api_call'
856
-
857
- # # Clean up the analysis result
858
- # cleaned_analysis = clean_analysis_result(analysis_result)
859
-
860
- # job_analysis = JobAnalysis(
861
- # job_title=job_row.get('job_title', 'Unknown'),
862
- # company_name=job_row.get('company_name', 'Unknown'),
863
- # analysis=cleaned_analysis
864
- # )
865
- # job_analyses.append(job_analysis.dict())
866
-
867
- # # Sort jobs by final_score in descending order (highest scores first)
868
- # job_analyses = sort_jobs_by_score(job_analyses)
869
-
870
- # # Count existing submissions vs API calls
871
- # existing_submissions_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'existing_submission')
872
- # api_calls_count = sum(1 for analysis in job_analyses if analysis.get('analysis', {}).get('source') == 'api_call')
873
-
874
- # # Clean up temporary file
875
- # os.unlink(tmp_file_path)
876
-
877
- # # Calculate processing time
878
- # processing_time = time.time() - request_start_time
879
- # logger.info(f"Request completed in {processing_time:.2f} seconds")
880
-
881
- # return {
882
- # "resume_info": resume_info,
883
- # "industry": resume_info['industry'],
884
- # "location": resume_info['location'],
885
- # "experience_years": resume_info['yoe'],
886
- # "jobs_analyzed": len(job_analyses),
887
- # "location_filtered": not location_filtered_jobs.empty,
888
- # "experience_filtered": not experience_filtered_jobs.empty,
889
- # "priority_filtered": not priority_filtered_jobs.empty,
890
- # "existing_submissions_used": existing_submissions_count,
891
- # "api_calls_made": api_calls_count,
892
- # "candidate_found": candidate_id is not None,
893
- # "processing_time_seconds": round(processing_time, 2),
894
- # "job_analyses": job_analyses
895
- # }
896
-
897
- # except Exception as e:
898
- # # Clean up temporary file in case of error
899
- # if os.path.exists(tmp_file_path):
900
- # os.unlink(tmp_file_path)
901
- # raise e
902
-
903
- # except Exception as e:
904
- # logger.error(f"Processing failed: {str(e)}", exc_info=True)
905
- # return JSONResponse(
906
- # status_code=500,
907
- # content={"error": f"Processing failed: {str(e)}"}
908
- # )
909
-
910
- # @app.get("/health")
911
- # async def health_check(api_key: str = Depends(verify_api_key)):
912
- # """
913
- # Health check endpoint with database connectivity check
914
- # """
915
- # health_status = {
916
- # "status": "healthy",
917
- # "message": "Job Recommendation API is running",
918
- # "timestamp": time.time()
919
- # }
920
-
921
- # # Check database connectivity
922
- # try:
923
- # with engine.connect() as conn:
924
- # result = conn.execute(text("SELECT 1"))
925
- # health_status["database"] = "connected"
926
- # except Exception as e:
927
- # logger.error(f"Database health check failed: {e}")
928
- # health_status["database"] = "disconnected"
929
- # health_status["status"] = "degraded"
930
-
931
- # return health_status
932
-
933
- # @app.get("/")
934
- # async def root():
935
- # """
936
- # Root endpoint
937
- # """
938
- # return {
939
- # "message": "Job Recommendation API",
940
- # "version": "1.0.0",
941
- # "docs": "/docs",
942
- # "health": "/health"
943
- # }
944
-
945
- # if __name__ == "__main__":
946
- # import uvicorn
947
- # port = int(os.getenv("PORT", 8080))
948
- # logger.info(f"Starting server on port {port}")
949
- # uvicorn.run(app, host="0.0.0.0", port=port)
950
-
951
-
952
-
953
-
954
-
955
  import pandas as pd
956
  import requests
957
  from pydantic import BaseModel, Field
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import requests
3
  from pydantic import BaseModel, Field