File size: 3,467 Bytes
fa525b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.document_loaders import PyPDFLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os,warnings

warnings.filterwarnings("ignore")

class cv_score(BaseModel):
    """You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1.
    > Relevant Score Between (0 and 1):
    """
    score: str = Field(..., description="Give a score to the CV between 0 and 1")

google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD")

google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google])
# ,tool_choice="predict_bool"
llm_with_tools = google2.bind_tools([cv_score])

def get_data(file_path):
    pages = PyPDFLoader(file_path=file_path).load()
    if len(pages) > 1:
        pdfstring = ""
        metadata = {}
        for page in pages:
            pdfstring += page.page_content
            metadata.update(page.metadata)

        return [Document(
            page_content=pdfstring,
            metadata=metadata)][0]

    else:
        return pages[0]

def process_score(cv_score):
    try:
        return float(cv_score[0].score)
    except Exception as e:
        return 0.50
    
template = PromptTemplate.from_template("""
<job description>
{job_des}
</job description>
------------
<cv>
{cv}
</cv>
""")

cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score

def process_data(dict_input: dict) -> dict:
    pdf_data:Document = dict_input["pdf_data"]
    return {
        "source": pdf_data.metadata["source"],
        "cv": pdf_data.page_content,
        "job_des": dict_input["job_des"]
    }

pdfs_dir = "data"
job_description = "HI looking for python developer"

content_chain = RunnableParallel(
    {
        "pdf_data": lambda x: get_data(x["pdf_path"]),
        "job_des": lambda x: x["job_des"]
    } 
) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal)

def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]:
    scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True)

    # Calculate the number of CVs to shortlist based on the percentage
    shortlist_count = int(len(scored_cvs) * percentage / 100)
    
    # Select the top N percent CVs
    shortlisted_cvs = scored_cvs[:shortlist_count]
    
    return [cv.get("source") for cv in shortlisted_cvs]

def compression(pdfs_path:list[dict],percentage:int):
    scored_cvs_list:list[dict] = content_chain.batch(pdfs_path)
    shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage)
    return shortlisted_cvs