File size: 1,915 Bytes
06cf97c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import boto3
import os
import json

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain, SequentialChain

llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])

def get_resume_string() -> str:

    s3 = boto3.client(
        's3',
        region_name='eu-west-1'
    )

    resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')

    resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
    resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
    resumes_list = [s.replace('•', '\n - ') for s in resumes_list]
    # resume_string =''.join(resumes_list)

    return resumes_list

def get_skills(resumes: str) -> list:

    template_resumes_get_skills = """
    Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain. 
    For each domain list the skills of the resumes that are part of that domain.
    
    Create a JSON object where they keys are the domains and the values are a list containing the skills.

    Return that JSON object only.

    <RESUMES>
    {resumes}
    </RESUMES>
    """

    prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
    resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")

    get_skills_resumes_chain = SequentialChain(
        chains=[resume_skills],
        input_variables=["resumes"],
        output_variables=["resume_skills"],
        verbose=False
    )

    result = get_skills_resumes_chain({"resumes": resumes})
    print(result)
    resume_skills = json.loads(result['resume_skills'])

if __name__ == "__main__":
    resumes = get_resume_string()
    for x in resumes:
        get_skills(x)