Spaces:
Runtime error
Runtime error
File size: 1,915 Bytes
06cf97c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import boto3
import os
import json
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain, SequentialChain
llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])
def get_resume_string() -> str:
s3 = boto3.client(
's3',
region_name='eu-west-1'
)
resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')
resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list]
# resume_string =''.join(resumes_list)
return resumes_list
def get_skills(resumes: str) -> list:
template_resumes_get_skills = """
Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
For each domain list the skills of the resumes that are part of that domain.
Create a JSON object where they keys are the domains and the values are a list containing the skills.
Return that JSON object only.
<RESUMES>
{resumes}
</RESUMES>
"""
prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")
get_skills_resumes_chain = SequentialChain(
chains=[resume_skills],
input_variables=["resumes"],
output_variables=["resume_skills"],
verbose=False
)
result = get_skills_resumes_chain({"resumes": resumes})
print(result)
resume_skills = json.loads(result['resume_skills'])
if __name__ == "__main__":
resumes = get_resume_string()
for x in resumes:
get_skills(x) |