Spaces:

faizan23423
/

cvscreening

Sleeping

App Files Files Community

cvscreening / main.py

faizanmumtaz

first commmit

fa525b5 12 months ago

raw

history blame

3.47 kB

	from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
	from langchain_core.output_parsers.openai_tools import PydanticToolsParser
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.pydantic_v1 import BaseModel, Field
	from langchain.document_loaders import PyPDFLoader
	from langchain_core.prompts import PromptTemplate
	from langchain_core.documents import Document
	from langchain.prompts import PromptTemplate
	from langchain_groq import ChatGroq
	import os,warnings

	warnings.filterwarnings("ignore")

	class cv_score(BaseModel):
	"""You are a powerful HR assistant. Your task is to review the given CV and determine if it matches the job requirements specified in the job description, and give a score between 0 and 1 based on their relevancy. Please do your best; it is very important for my career. If both or any of the fields are empty, then also return 0. Also, return the matching score between 0 and 1.
	> Relevant Score Between (0 and 1):
	"""
	score: str = Field(..., description="Give a score to the CV between 0 and 1")

	google = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="l1Jm8jpIpq6IvfqES5LEDUQp9CkSINmjiwYoLNtD")

	google2 = ChatGoogleGenerativeAI(temperature=0,model="gemini-1.5-flash",google_api_key="AIzaSyARfxSKQwobd0MNuOAt6yUjmNUFGX4k_eI").with_fallbacks([google])
	# ,tool_choice="predict_bool"
	llm_with_tools = google2.bind_tools([cv_score])

	def get_data(file_path):
	pages = PyPDFLoader(file_path=file_path).load()
	if len(pages) > 1:
	pdfstring = ""
	metadata = {}
	for page in pages:
	pdfstring += page.page_content
	metadata.update(page.metadata)

	return [Document(
	page_content=pdfstring,
	metadata=metadata)][0]

	else:
	return pages[0]

	def process_score(cv_score):
	try:
	return float(cv_score[0].score)
	except Exception as e:
	return 0.50

	template = PromptTemplate.from_template("""
	<job description>
	{job_des}
	</job description>
	------------
	<cv>
	{cv}
	</cv>
	""")

	cv_score_cal = template \| llm_with_tools \| PydanticToolsParser(tools=[cv_score]) \| process_score

	def process_data(dict_input: dict) -> dict:
	pdf_data:Document = dict_input["pdf_data"]
	return {
	"source": pdf_data.metadata["source"],
	"cv": pdf_data.page_content,
	"job_des": dict_input["job_des"]
	}

	pdfs_dir = "data"
	job_description = "HI looking for python developer"

	content_chain = RunnableParallel(
	{
	"pdf_data": lambda x: get_data(x["pdf_path"]),
	"job_des": lambda x: x["job_des"]
	}
	) \| RunnableLambda(process_data) \| RunnablePassthrough.assign(cv_score = (lambda x:x) \| cv_score_cal)

	def shortlist_cvs(scored_cvs:list[dict],percentage:int) -> list[str]:
	scored_cvs.sort(key=lambda x:x.get("cv_score",0),reverse=True)

	# Calculate the number of CVs to shortlist based on the percentage
	shortlist_count = int(len(scored_cvs) * percentage / 100)

	# Select the top N percent CVs
	shortlisted_cvs = scored_cvs[:shortlist_count]

	return [cv.get("source") for cv in shortlisted_cvs]

	def compression(pdfs_path:list[dict],percentage:int):
	scored_cvs_list:list[dict] = content_chain.batch(pdfs_path)
	shortlisted_cvs:list[str] = shortlist_cvs(scored_cvs_list,percentage)
	return shortlisted_cvs