from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda from langchain_core.output_parsers.openai_tools import PydanticToolsParser from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.runnables import RunnablePassthrough from langchain_core.pydantic_v1 import BaseModel, Field from langchain.document_loaders import PyPDFLoader from langchain_core.prompts import PromptTemplate from langchain_core.documents import Document from langchain.prompts import PromptTemplate from langchain_groq import ChatGroq import os,warnings warnings.filterwarnings("ignore") class cv_score(BaseModel): """You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1. > Relevant Score Between (0 and 1): """ score: str = Field(..., description="Give a score to the CV between 0 and 1") google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD") google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google]) # ,tool_choice="predict_bool" llm_with_tools = google2.bind_tools([cv_score]) def get_data(file_path): pages = PyPDFLoader(file_path=file_path).load() if len(pages) > 1: pdfstring = "" metadata = {} for page in pages: pdfstring += page.page_content metadata.update(page.metadata) return [Document( page_content=pdfstring, metadata=metadata)][0] else: return pages[0] def process_score(cv_score): try: return float(cv_score[0].score) except Exception as e: return 0.50 template = PromptTemplate.from_template(""" {job_des} ------------ {cv} """) cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score def process_data(dict_input: dict) -> dict: pdf_data:Document = dict_input["pdf_data"] return { "source": pdf_data.metadata["source"], "cv": pdf_data.page_content, "job_des": dict_input["job_des"] } pdfs_dir = "data" job_description = "HI looking for python developer" content_chain = RunnableParallel( { "pdf_data": lambda x: get_data(x["pdf_path"]), "job_des": lambda x: x["job_des"] } ) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal) def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]: scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True) # Calculate the number of CVs to shortlist based on the percentage shortlist_count = int(len(scored_cvs) * percentage / 100) # Select the top N percent CVs shortlisted_cvs = scored_cvs[:shortlist_count] return [cv.get("source") for cv in shortlisted_cvs] def compression(pdfs_path:list[dict],percentage:int): scored_cvs_list:list[dict] = content_chain.batch(pdfs_path) shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage) return shortlisted_cvs