Spaces:
Sleeping
Sleeping
Commit
·
fa525b5
1
Parent(s):
4f853b0
first commmit
Browse files- app.py +79 -0
- dockerfile +23 -0
- main.py +93 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Form,BackgroundTasks
|
2 |
+
from fastapi.responses import FileResponse
|
3 |
+
import shutil,os,uuid,asyncio
|
4 |
+
from zipfile import ZipFile
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
import logging
|
10 |
+
|
11 |
+
# Configure logging
|
12 |
+
logging.basicConfig(
|
13 |
+
filename='app.log', # Log file path
|
14 |
+
level=logging.ERROR, # Minimum severity level to log (e.g., ERROR, WARNING, INFO)
|
15 |
+
format='%(asctime)s %(levelname)s: %(message)s',
|
16 |
+
datefmt='%Y-%m-%d %H:%M:%S')
|
17 |
+
|
18 |
+
# Get the secret key from environment variable
|
19 |
+
SECRET_KEY = "123"
|
20 |
+
def cleanup_directory(path: str):
|
21 |
+
shutil.rmtree(path)
|
22 |
+
|
23 |
+
@app.post("/uploadfiles/")
|
24 |
+
def upload_files(
|
25 |
+
background_tasks: BackgroundTasks,
|
26 |
+
files: List[UploadFile] = File(...),
|
27 |
+
api_key: str = Form(...),
|
28 |
+
percentage: int = Form(...),
|
29 |
+
job_description : str = Form(...)):
|
30 |
+
|
31 |
+
if api_key != SECRET_KEY:
|
32 |
+
raise HTTPException(status_code=401, detail="Unauthorized")
|
33 |
+
|
34 |
+
if not (5 <= len(files) <= 900):
|
35 |
+
raise HTTPException(status_code=400, detail="Number of files must be between 20 and 900.")
|
36 |
+
|
37 |
+
if percentage < 0 or percentage > 100:
|
38 |
+
raise HTTPException(status_code=400, detail="Percentage must be between 0 and 100.")
|
39 |
+
|
40 |
+
# Validate all files are PDFs
|
41 |
+
for file in files:
|
42 |
+
if not file.filename.endswith(".pdf"):
|
43 |
+
raise HTTPException(status_code=400, detail="All files must be PDF.")
|
44 |
+
|
45 |
+
pdfs_paths = []
|
46 |
+
main_path = f"data/resumes/{str(uuid.uuid4())}"
|
47 |
+
|
48 |
+
# Create the directory if it doesn't exist
|
49 |
+
os.makedirs(main_path, exist_ok=True)
|
50 |
+
|
51 |
+
for file in files:
|
52 |
+
file_location = f"{main_path}/{file.filename}"
|
53 |
+
|
54 |
+
with open(file_location, "wb") as buffer:
|
55 |
+
shutil.copyfileobj(file.file, buffer)
|
56 |
+
|
57 |
+
pdfs_paths.append(file_location)
|
58 |
+
|
59 |
+
try:
|
60 |
+
pdfs_path_with_description = [{"pdf_path":pdf_path,"job_des":job_description} for pdf_path in pdfs_paths]
|
61 |
+
|
62 |
+
from app import compression
|
63 |
+
# short_listed_files_paths = asyncio.run(compression(pdfs_path_with_description,percentage))
|
64 |
+
short_listed_files_paths = compression(pdfs_path_with_description,percentage)
|
65 |
+
|
66 |
+
zip_filename = f"{main_path}/{str(uuid.uuid4())}.zip"
|
67 |
+
|
68 |
+
with ZipFile(zip_filename, 'w') as zipf:
|
69 |
+
for file_path in short_listed_files_paths:
|
70 |
+
zipf.write(file_path, os.path.basename(file_path))
|
71 |
+
|
72 |
+
# Return the file response and clean up after sending
|
73 |
+
background_tasks.add_task(cleanup_directory, main_path)
|
74 |
+
return FileResponse(zip_filename, media_type='application/zip', filename=zip_filename)
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
logging.error(f"Error occurred: {str(e)}")
|
78 |
+
cleanup_directory(main_path)
|
79 |
+
raise HTTPException(status_code=500, detail=str(e))
|
dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use the official Python image from the Docker Hub
|
2 |
+
FROM python:3.12-slim
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the requirements file into the container at /app
|
8 |
+
COPY requirements.txt .
|
9 |
+
|
10 |
+
# Install the dependencies
|
11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
12 |
+
|
13 |
+
# Copy the rest of the application code into the container at /app
|
14 |
+
COPY . .
|
15 |
+
|
16 |
+
# Expose the port that the app runs on
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
# Command to run the Uvicorn server
|
20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
21 |
+
|
22 |
+
# docker run -d --rm --name "pdfsummary" -e dgoogle_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -e google_api_key=AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI -p 8000:8000 cvscreening:latest
|
23 |
+
# docker run -d --rm -p
|
main.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
|
2 |
+
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
|
3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
+
from langchain_core.runnables import RunnablePassthrough
|
5 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
6 |
+
from langchain.document_loaders import PyPDFLoader
|
7 |
+
from langchain_core.prompts import PromptTemplate
|
8 |
+
from langchain_core.documents import Document
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain_groq import ChatGroq
|
11 |
+
import os,warnings
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
|
15 |
+
class cv_score(BaseModel):
|
16 |
+
"""You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1.
|
17 |
+
> Relevant Score Between (0 and 1):
|
18 |
+
"""
|
19 |
+
score: str = Field(..., description="Give a score to the CV between 0 and 1")
|
20 |
+
|
21 |
+
google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD")
|
22 |
+
|
23 |
+
google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google])
|
24 |
+
# ,tool_choice="predict_bool"
|
25 |
+
llm_with_tools = google2.bind_tools([cv_score])
|
26 |
+
|
27 |
+
def get_data(file_path):
|
28 |
+
pages = PyPDFLoader(file_path=file_path).load()
|
29 |
+
if len(pages) > 1:
|
30 |
+
pdfstring = ""
|
31 |
+
metadata = {}
|
32 |
+
for page in pages:
|
33 |
+
pdfstring += page.page_content
|
34 |
+
metadata.update(page.metadata)
|
35 |
+
|
36 |
+
return [Document(
|
37 |
+
page_content=pdfstring,
|
38 |
+
metadata=metadata)][0]
|
39 |
+
|
40 |
+
else:
|
41 |
+
return pages[0]
|
42 |
+
|
43 |
+
def process_score(cv_score):
|
44 |
+
try:
|
45 |
+
return float(cv_score[0].score)
|
46 |
+
except Exception as e:
|
47 |
+
return 0.50
|
48 |
+
|
49 |
+
template = PromptTemplate.from_template("""
|
50 |
+
<job description>
|
51 |
+
{job_des}
|
52 |
+
</job description>
|
53 |
+
------------
|
54 |
+
<cv>
|
55 |
+
{cv}
|
56 |
+
</cv>
|
57 |
+
""")
|
58 |
+
|
59 |
+
cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score
|
60 |
+
|
61 |
+
def process_data(dict_input: dict) -> dict:
|
62 |
+
pdf_data:Document = dict_input["pdf_data"]
|
63 |
+
return {
|
64 |
+
"source": pdf_data.metadata["source"],
|
65 |
+
"cv": pdf_data.page_content,
|
66 |
+
"job_des": dict_input["job_des"]
|
67 |
+
}
|
68 |
+
|
69 |
+
pdfs_dir = "data"
|
70 |
+
job_description = "HI looking for python developer"
|
71 |
+
|
72 |
+
content_chain = RunnableParallel(
|
73 |
+
{
|
74 |
+
"pdf_data": lambda x: get_data(x["pdf_path"]),
|
75 |
+
"job_des": lambda x: x["job_des"]
|
76 |
+
}
|
77 |
+
) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal)
|
78 |
+
|
79 |
+
def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]:
|
80 |
+
scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True)
|
81 |
+
|
82 |
+
# Calculate the number of CVs to shortlist based on the percentage
|
83 |
+
shortlist_count = int(len(scored_cvs) * percentage / 100)
|
84 |
+
|
85 |
+
# Select the top N percent CVs
|
86 |
+
shortlisted_cvs = scored_cvs[:shortlist_count]
|
87 |
+
|
88 |
+
return [cv.get("source") for cv in shortlisted_cvs]
|
89 |
+
|
90 |
+
def compression(pdfs_path:list[dict],percentage:int):
|
91 |
+
scored_cvs_list:list[dict] = content_chain.batch(pdfs_path)
|
92 |
+
shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage)
|
93 |
+
return shortlisted_cvs
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.111.0
|
2 |
+
langchain==0.2.7
|
3 |
+
langchain_community==0.2.7
|
4 |
+
langchain_core==0.2.12
|
5 |
+
langchain_google_genai==1.0.7
|
6 |
+
langchain_groq==0.1.6
|
7 |
+
uvicorn==0.30.1
|
8 |
+
pypdf
|