faizanmumtaz commited on
Commit
fa525b5
·
1 Parent(s): 4f853b0

first commmit

Browse files
Files changed (4) hide show
  1. app.py +79 -0
  2. dockerfile +23 -0
  3. main.py +93 -0
  4. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form,BackgroundTasks
2
+ from fastapi.responses import FileResponse
3
+ import shutil,os,uuid,asyncio
4
+ from zipfile import ZipFile
5
+ from typing import List
6
+
7
+ app = FastAPI()
8
+
9
+ import logging
10
+
11
+ # Configure logging
12
+ logging.basicConfig(
13
+ filename='app.log', # Log file path
14
+ level=logging.ERROR, # Minimum severity level to log (e.g., ERROR, WARNING, INFO)
15
+ format='%(asctime)s %(levelname)s: %(message)s',
16
+ datefmt='%Y-%m-%d %H:%M:%S')
17
+
18
+ # Get the secret key from environment variable
19
+ SECRET_KEY = "123"
20
+ def cleanup_directory(path: str):
21
+ shutil.rmtree(path)
22
+
23
+ @app.post("/uploadfiles/")
24
+ def upload_files(
25
+ background_tasks: BackgroundTasks,
26
+ files: List[UploadFile] = File(...),
27
+ api_key: str = Form(...),
28
+ percentage: int = Form(...),
29
+ job_description : str = Form(...)):
30
+
31
+ if api_key != SECRET_KEY:
32
+ raise HTTPException(status_code=401, detail="Unauthorized")
33
+
34
+ if not (5 <= len(files) <= 900):
35
+ raise HTTPException(status_code=400, detail="Number of files must be between 20 and 900.")
36
+
37
+ if percentage < 0 or percentage > 100:
38
+ raise HTTPException(status_code=400, detail="Percentage must be between 0 and 100.")
39
+
40
+ # Validate all files are PDFs
41
+ for file in files:
42
+ if not file.filename.endswith(".pdf"):
43
+ raise HTTPException(status_code=400, detail="All files must be PDF.")
44
+
45
+ pdfs_paths = []
46
+ main_path = f"data/resumes/{str(uuid.uuid4())}"
47
+
48
+ # Create the directory if it doesn't exist
49
+ os.makedirs(main_path, exist_ok=True)
50
+
51
+ for file in files:
52
+ file_location = f"{main_path}/{file.filename}"
53
+
54
+ with open(file_location, "wb") as buffer:
55
+ shutil.copyfileobj(file.file, buffer)
56
+
57
+ pdfs_paths.append(file_location)
58
+
59
+ try:
60
+ pdfs_path_with_description = [{"pdf_path":pdf_path,"job_des":job_description} for pdf_path in pdfs_paths]
61
+
62
+ from app import compression
63
+ # short_listed_files_paths = asyncio.run(compression(pdfs_path_with_description,percentage))
64
+ short_listed_files_paths = compression(pdfs_path_with_description,percentage)
65
+
66
+ zip_filename = f"{main_path}/{str(uuid.uuid4())}.zip"
67
+
68
+ with ZipFile(zip_filename, 'w') as zipf:
69
+ for file_path in short_listed_files_paths:
70
+ zipf.write(file_path, os.path.basename(file_path))
71
+
72
+ # Return the file response and clean up after sending
73
+ background_tasks.add_task(cleanup_directory, main_path)
74
+ return FileResponse(zip_filename, media_type='application/zip', filename=zip_filename)
75
+
76
+ except Exception as e:
77
+ logging.error(f"Error occurred: {str(e)}")
78
+ cleanup_directory(main_path)
79
+ raise HTTPException(status_code=500, detail=str(e))
dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image from the Docker Hub
2
+ FROM python:3.12-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container at /app
8
+ COPY requirements.txt .
9
+
10
+ # Install the dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application code into the container at /app
14
+ COPY . .
15
+
16
+ # Expose the port that the app runs on
17
+ EXPOSE 7860
18
+
19
+ # Command to run the Uvicorn server
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
21
+
22
+ # docker run -d --rm --name "pdfsummary" -e dgoogle_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -e google_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -p 8000:8000 cvscreening:latest
23
+ # docker run -d --rm -p
main.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
2
+ from langchain_core.output_parsers.openai_tools import PydanticToolsParser
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.runnables import RunnablePassthrough
5
+ from langchain_core.pydantic_v1 import BaseModel, Field
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain_core.prompts import PromptTemplate
8
+ from langchain_core.documents import Document
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain_groq import ChatGroq
11
+ import os,warnings
12
+
13
+ warnings.filterwarnings("ignore")
14
+
15
+ class cv_score(BaseModel):
16
+ """You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1.
17
+ > Relevant Score Between (0 and 1):
18
+ """
19
+ score: str = Field(..., description="Give a score to the CV between 0 and 1")
20
+
21
+ google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD")
22
+
23
+ google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google])
24
+ # ,tool_choice="predict_bool"
25
+ llm_with_tools = google2.bind_tools([cv_score])
26
+
27
+ def get_data(file_path):
28
+ pages = PyPDFLoader(file_path=file_path).load()
29
+ if len(pages) > 1:
30
+ pdfstring = ""
31
+ metadata = {}
32
+ for page in pages:
33
+ pdfstring += page.page_content
34
+ metadata.update(page.metadata)
35
+
36
+ return [Document(
37
+ page_content=pdfstring,
38
+ metadata=metadata)][0]
39
+
40
+ else:
41
+ return pages[0]
42
+
43
+ def process_score(cv_score):
44
+ try:
45
+ return float(cv_score[0].score)
46
+ except Exception as e:
47
+ return 0.50
48
+
49
+ template = PromptTemplate.from_template("""
50
+ <job description>
51
+ {job_des}
52
+ </job description>
53
+ ------------
54
+ <cv>
55
+ {cv}
56
+ </cv>
57
+ """)
58
+
59
+ cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score
60
+
61
+ def process_data(dict_input: dict) -> dict:
62
+ pdf_data:Document = dict_input["pdf_data"]
63
+ return {
64
+ "source": pdf_data.metadata["source"],
65
+ "cv": pdf_data.page_content,
66
+ "job_des": dict_input["job_des"]
67
+ }
68
+
69
+ pdfs_dir = "data"
70
+ job_description = "HI looking for python developer"
71
+
72
+ content_chain = RunnableParallel(
73
+ {
74
+ "pdf_data": lambda x: get_data(x["pdf_path"]),
75
+ "job_des": lambda x: x["job_des"]
76
+ }
77
+ ) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal)
78
+
79
+ def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]:
80
+ scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True)
81
+
82
+ # Calculate the number of CVs to shortlist based on the percentage
83
+ shortlist_count = int(len(scored_cvs) * percentage / 100)
84
+
85
+ # Select the top N percent CVs
86
+ shortlisted_cvs = scored_cvs[:shortlist_count]
87
+
88
+ return [cv.get("source") for cv in shortlisted_cvs]
89
+
90
+ def compression(pdfs_path:list[dict],percentage:int):
91
+ scored_cvs_list:list[dict] = content_chain.batch(pdfs_path)
92
+ shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage)
93
+ return shortlisted_cvs
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ langchain==0.2.7
3
+ langchain_community==0.2.7
4
+ langchain_core==0.2.12
5
+ langchain_google_genai==1.0.7
6
+ langchain_groq==0.1.6
7
+ uvicorn==0.30.1
8
+ pypdf