Spaces:

drift-ai
/

recruiter-assistant

Runtime error

App Files Files Community

recruiter-assistant / test.py

FilipinosRich

First draft of testing at scale

06cf97c almost 2 years ago

raw

history blame

1.92 kB

	import boto3
	import os
	import json

	from langchain.chat_models import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	from langchain.chains import LLMChain, SequentialChain

	llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])

	def get_resume_string() -> str:

	s3 = boto3.client(
	's3',
	region_name='eu-west-1'
	)

	resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')

	resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
	resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
	resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list]
	# resume_string =''.join(resumes_list)

	return resumes_list

	def get_skills(resumes: str) -> list:

	template_resumes_get_skills = """
	Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
	For each domain list the skills of the resumes that are part of that domain.

	Create a JSON object where they keys are the domains and the values are a list containing the skills.

	Return that JSON object only.

	<RESUMES>
	{resumes}
	</RESUMES>
	"""

	prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
	resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")

	get_skills_resumes_chain = SequentialChain(
	chains=[resume_skills],
	input_variables=["resumes"],
	output_variables=["resume_skills"],
	verbose=False
	)

	result = get_skills_resumes_chain({"resumes": resumes})
	print(result)
	resume_skills = json.loads(result['resume_skills'])

	if __name__ == "__main__":
	resumes = get_resume_string()
	for x in resumes:
	get_skills(x)