Spaces:
Runtime error
Runtime error
import boto3 | |
import os | |
import json | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.chains import LLMChain, SequentialChain | |
llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"]) | |
def get_resume_string() -> str: | |
s3 = boto3.client( | |
's3', | |
region_name='eu-west-1' | |
) | |
resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv') | |
resumes_list = resumes['Body'].read().decode('utf-8').splitlines() | |
resumes_list = [s.replace('. ', '.\n') for s in resumes_list] | |
resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list] | |
# resume_string =''.join(resumes_list) | |
return resumes_list | |
def get_skills(resumes: str) -> list: | |
template_resumes_get_skills = """ | |
Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain. | |
For each domain list the skills of the resumes that are part of that domain. | |
Create a JSON object where they keys are the domains and the values are a list containing the skills. | |
Return that JSON object only. | |
<RESUMES> | |
{resumes} | |
</RESUMES> | |
""" | |
prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills) | |
resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills") | |
get_skills_resumes_chain = SequentialChain( | |
chains=[resume_skills], | |
input_variables=["resumes"], | |
output_variables=["resume_skills"], | |
verbose=False | |
) | |
result = get_skills_resumes_chain({"resumes": resumes}) | |
print(result) | |
resume_skills = json.loads(result['resume_skills']) | |
if __name__ == "__main__": | |
resumes = get_resume_string() | |
for x in resumes: | |
get_skills(x) |