import boto3
import os
import json

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain, SequentialChain

llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])

def get_resume_string() -> str:

    s3 = boto3.client(
        's3',
        region_name='eu-west-1'
    )

    resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')

    resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
    resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
    resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list]
    # resume_string =''.join(resumes_list)

    return resumes_list

def get_skills(resumes: str) -> list:

    template_resumes_get_skills = """
    Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain. 
    For each domain list the skills of the resumes that are part of that domain.
    
    Create a JSON object where they keys are the domains and the values are a list containing the skills.

    Return that JSON object only.

    <RESUMES>
    {resumes}
    </RESUMES>
    """

    prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
    resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")

    get_skills_resumes_chain = SequentialChain(
        chains=[resume_skills],
        input_variables=["resumes"],
        output_variables=["resume_skills"],
        verbose=False
    )

    result = get_skills_resumes_chain({"resumes": resumes})
    print(result)
    resume_skills = json.loads(result['resume_skills'])

if __name__ == "__main__":
    resumes = get_resume_string()
    for x in resumes:
        get_skills(x)