Spaces:
Sleeping
Sleeping
| from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda | |
| from langchain_core.output_parsers.openai_tools import PydanticToolsParser | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.pydantic_v1 import BaseModel, Field | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.documents import Document | |
| from langchain.prompts import PromptTemplate | |
| from langchain_groq import ChatGroq | |
| import os,warnings | |
| warnings.filterwarnings("ignore") | |
| class cv_score(BaseModel): | |
| """You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1. | |
| > Relevant Score Between (0 and 1): | |
| """ | |
| score: str = Field(..., description="Give a score to the CV between 0 and 1") | |
| google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD") | |
| google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google]) | |
| # ,tool_choice="predict_bool" | |
| llm_with_tools = google2.bind_tools([cv_score]) | |
| def get_data(file_path): | |
| pages = PyPDFLoader(file_path=file_path).load() | |
| if len(pages) > 1: | |
| pdfstring = "" | |
| metadata = {} | |
| for page in pages: | |
| pdfstring += page.page_content | |
| metadata.update(page.metadata) | |
| return [Document( | |
| page_content=pdfstring, | |
| metadata=metadata)][0] | |
| else: | |
| return pages[0] | |
| def process_score(cv_score): | |
| try: | |
| return float(cv_score[0].score) | |
| except Exception as e: | |
| return 0.50 | |
| template = PromptTemplate.from_template(""" | |
| <job description> | |
| {job_des} | |
| </job description> | |
| ------------ | |
| <cv> | |
| {cv} | |
| </cv> | |
| """) | |
| cv_score_cal = template | llm_with_tools | PydanticToolsParser(tools=[cv_score]) | process_score | |
| def process_data(dict_input: dict) -> dict: | |
| pdf_data:Document = dict_input["pdf_data"] | |
| return { | |
| "source": pdf_data.metadata["source"], | |
| "cv": pdf_data.page_content, | |
| "job_des": dict_input["job_des"] | |
| } | |
| pdfs_dir = "data" | |
| job_description = "HI looking for python developer" | |
| content_chain = RunnableParallel( | |
| { | |
| "pdf_data": lambda x: get_data(x["pdf_path"]), | |
| "job_des": lambda x: x["job_des"] | |
| } | |
| ) | RunnableLambda(process_data) | RunnablePassthrough.assign(cv_score = (lambda x:x) | cv_score_cal) | |
| def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]: | |
| scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True) | |
| # Calculate the number of CVs to shortlist based on the percentage | |
| shortlist_count = int(len(scored_cvs) * percentage / 100) | |
| # Select the top N percent CVs | |
| shortlisted_cvs = scored_cvs[:shortlist_count] | |
| return [cv.get("source") for cv in shortlisted_cvs] | |
| def compression(pdfs_path:list[dict],percentage:int): | |
| scored_cvs_list:list[dict] = content_chain.batch(pdfs_path) | |
| shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage) | |
| return shortlisted_cvs |