# NDIS Project - Azure OpenAI - PBSP Scoring - Page 4 - Reason(s) to why teaching is unneeded

In [None]:
import os
import openai
import re
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import display, clear_output, Javascript, HTML, Markdown
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import json
import spacy
from spacy import displacy
from dotenv import load_dotenv
import pandas as pd
import argilla as rg
from argilla.metrics.text_classification import f1
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.width', 10000)

In [None]:
#initializations
openai.api_key = os.environ['API_KEY']
openai.api_base = os.environ['API_BASE']
openai.api_type = os.environ['API_TYPE']
openai.api_version = os.environ['API_VERSION']
deployment_name = os.environ['DEPLOYMENT_ID']

#argilla
rg.init(
    api_url=os.environ["ARGILLA_API_URL"],
    api_key=os.environ["ARGILLA_API_KEY"]
)

In [None]:
#sentence extraction
def extract_sentences(paragraph):
    symbols = ['\\.', '!', '\\?', ';', ':', ',', '\\_', '\n', '\\-']
    pattern = '|'.join([f'{symbol}' for symbol in symbols])
    sentences = re.split(pattern, paragraph)
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    return sentences

In [None]:
def process_response(response, query):
    sentences = []
    topics = []
    scores = []
    lines = response.strip().split("\n")
    for line in lines:
        if "Reasons:" in line:
            topic = "REASON"
        elif "None:" in line:
            topic = "NO REASON"
        else:
            try:
                phrase = line.split("(Confidence Score:")[0].strip()
                score = float(line.split("(Confidence Score:")[1].strip().replace(")", ""))
                sentences.append(phrase)
                topics.append(topic)
                scores.append(score)
            except:
                pass
    result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})
    try:
        result_df['Phrase'] = result_df['Phrase'].str.replace('\d+\.', '', regex=True)
        result_df['Phrase'] = result_df['Phrase'].str.replace('^\s', '', regex=True)
        result_df['Phrase'] = result_df['Phrase'].str.strip('"')
    except:
        sentences = extract_sentences(query)
        topics = ['NO REASON'] * len(sentences)
        scores = [0.9] * len(sentences)
        result_df = pd.DataFrame({'Phrase': sentences, 'Topic': topics, 'Score': scores})
    return result_df

In [None]:
def get_prompt(query):
    prompt = f"""
    In the positive behaviour support plan, the NDIS health practitioner, who is managing a person with disability, is required to teach the person with disability a new skill, an alternative behaviour or a functionally equivalent replacement behaviour. If not, then the health practitioner is required to state, in the practitioner paragraph below, the reason(s) for why teaching the person with disability any of the above skills or behaviours was not needed. 

    Practitioner Paragraph:
    {query}

    Requirement:
    Identify the phrases from the practitioner paragraph above that represent any reason(s) stated for why teaching the person with disability any skill or behaviour was not needed.

    Specifications of a correct answer:
    - Please provide a response that closely matches the information in the practitioner paragraph and does not deviate significantly from it.
    - Provide your answer in a numbered list. 
    - All the phrases in your answer must be exact substrings in the practitioner paragraph. without changing any characters.
    - All the upper case and lower case characters in the phrases in your answer must match the upper case and lower case characters in the practitioner paragraph.
    - Start numbering the phrases from number 1.
    - Start your answer for the phrases with the title "Reasons:"
    - For each phrase in your answer, provide a confidence score that ranges between 0.50 and 1.00, where a score of 0.50 indicates you are very weakly confident that the phrase represents a valid reason for why teaching the person with disability any skill or behaviour was not needed, whereas a score of 1.00 indicates you are very strongly confident that the phrase represents a valid reason for why teaching the person with disability any skill or behaviour was not needed..
    - Include another numbered list titled "None:", which includes all the remaining phrases in the practitioner paragraph that do not represent any valid reason for why teaching the person with disability any skill or behaviour was not needed. Provide a confidence score for each of these phrases as well.
    - There must not be any phrase in your answer that does not exist the practitioner paragraph.

    Example correct answer:

    Reasons:
    1. Eddie's autism is profound and restricts his ability to acquire new knowledge. (Confidence Score: 0.97)
    2. Eddie's current behaviors do not cause harm to himself or others, and have minimal impact on his daily activities. (Confidence Score: 0.95)
    3. Eddie finds comfort in his current routines and any modifications may provoke feelings of distress or anxiety. (Confidence Score: 0.93)
    4. Teaching Eddie new skills or behaviors necessitates a substantial investment of time and resources. (Confidence Score: 0.90)
    5. The advantages of teaching Eddie new skills or behaviors may not be significant or worthwhile in enhancing his quality of life. (Confidence Score: 0.88)

    None:
    1. I am a health practitioner who is managing Eddie, a person with autism, (Confidence Score: 0.99)
    2. As the NDIS practitioner managing Eddie's disability, I must acknowledge the importance of teaching him new skills. (Confidence Score: 0.95)
    3. However, in this particular instance, I was unable to teach Eddie for several reasons. (Confidence Score: 0.94)
    """
    return prompt

In [None]:
def get_response_chatgpt(prompt):
    response=openai.ChatCompletion.create(   
        engine=deployment_name,   
        messages=[         
        {"role": "system", "content": "You are a helpful assistant."},                  
        {"role": "user", "content": prompt}     
        ],
        temperature=0
    )
    reply = response["choices"][0]["message"]["content"]
    return reply

In [None]:
#query = """
#As the NDIS practitioner responsible for managing Eddie, I have not taught him any new skills or alternative behaviours for a number of reasons. Firstly, Eddie has a very limited capacity to learn due to the severity of his autism. Secondly, his behaviours are not causing any harm to himself or others, and are not significantly interfering with his ability to participate in daily activities. Thirdly, Eddie is comfortable with his current routines and any changes to these routines could cause him distress or anxiety. Fourthly, Eddie has a strong support network of family and carers who are able to manage his current behaviours effectively without the need for additional skills or behaviours to be taught. Finally, as Eddie is non-verbal and has limited communication skills, teaching him new skills or behaviours would require a significant amount of time and resources, which may not necessarily lead to any meaningful improvements in his quality of life. Based on these reasons, it is not necessary to teach Eddie any new skills or behaviours at this time.
#"""
#prompt = get_prompt(query)
#response = get_response_chatgpt(prompt)
#result_df = process_response(response, query)
#print(response)
#display(result_df)

In [None]:
def convert_df(result_df):
    new_df = pd.DataFrame(columns=['text', 'prediction'])
    new_df['text'] = result_df['Phrase']
    new_df['prediction'] = result_df.apply(lambda row: [[row['Topic'], row['Score']]], axis=1)
    return new_df

In [None]:
topic_color_dict = {
        'REASON': '#90EE90',
        'NO REASON': '#F08080'
    }

def color(df, color):
    return df.style.format({'Score': '{:,.2%}'.format}).bar(subset=['Score'], color=color)

def annotate_query(highlights, query, topics):
    ents = []
    for h, t in zip(highlights, topics):
        ent_dict = {}
        for match in re.finditer(h, query, re.IGNORECASE):
            ent_dict = {"start": match.start(), "end": match.end(), "label": t}
            break
        if len(ent_dict.keys()) > 0:
            ents.append(ent_dict)
    return ents

def path_to_image_html(path):
    return path

passing_score = 0.75
final_passing = 0.0
def display_final_df(agg_df):
    crits = [
            'REASON'
        ]
    if not isinstance(agg_df, str):
        tags = []
        orig_crits = crits
        crits = [x for x in crits if x in agg_df.index.tolist()]
        bools = [agg_df.loc[crit, 'Final_Score'] > final_passing for crit in crits]
        paths = ['YES' if x else 'NO' for x in bools]
        df = pd.DataFrame({'Unneeded Teaching Reason': crits, 'MENTIONED': paths})
        rem_crits = [x for x in orig_crits if x not in crits]
        if len(rem_crits) > 0:
            df2 = pd.DataFrame({'Unneeded Teaching Reason': rem_crits, 'MENTIONED': ['NO'] * len(rem_crits)})
            df = pd.concat([df, df2])
    else:
        df = pd.DataFrame({'Unneeded Teaching Reason': [crits[0]], 'MENTIONED': ['NO']})
    df = df.set_index('Unneeded Teaching Reason')
    pd.set_option('display.max_colwidth', None)
    display(HTML('<div style="text-align: center;">' + df.to_html(classes=["align-center"], index=True, escape=False ,formatters=dict(MENTIONED=path_to_image_html)) + '</div>'))
    

### Please indicate why the teaching of a new skill, alternative behaviour or functionally equivalent replacement behaviour is not needed

In [None]:
#demo with Voila

bhvr_label = widgets.Label(value='Please type your answer:')
bhvr_text_input = widgets.Textarea(
    value='',
    placeholder='Type your answer',
    description='',
    disabled=False,
    layout={'height': '300px', 'width': '90%'}
)

bhvr_nlp_btn = widgets.Button(
    description='Score Answer',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Score Answer',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_agr_btn = widgets.Button(
    description='Validate Data',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Validate Data',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
bhvr_eval_btn = widgets.Button(
    description='Evaluate Model',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Evaluate Model',
    icon='check',
    layout={'height': '70px', 'width': '250px'}
)
btn_box = widgets.HBox([bhvr_nlp_btn, bhvr_agr_btn, bhvr_eval_btn], 
                       layout={'width': '100%', 'height': '160%'})
bhvr_outt = widgets.Output()
bhvr_outt.layout.height = '100%'
bhvr_outt.layout.width = '100%'
bhvr_box = widgets.VBox([bhvr_text_input, btn_box, bhvr_outt], 
                   layout={'width': '100%', 'height': '160%'})
dataset_rg_name = 'pbsp-page4-unneeded-teaching-reason-argilla-ds'
agrilla_df = None
annotated = False
def on_bhvr_button_next(b):
    global agrilla_df
    with bhvr_outt:
        clear_output()
        query = bhvr_text_input.value
        prompt = get_prompt(query)
        response = get_response_chatgpt(prompt)
        result_df = process_response(response, query)
        sub_result_df = result_df[(result_df['Score'] >= passing_score) & (result_df['Topic'] != 'NO REASON')]
        sub_2_result_df = result_df[result_df['Topic'] == 'NO REASON']
        highlights = []
        if len(sub_result_df) > 0:
            highlights = sub_result_df['Phrase'].tolist()
            highlight_topics = sub_result_df['Topic'].tolist()    
            ents = annotate_query(highlights, query, highlight_topics)
            colors = {}
            for ent, ht in zip(ents, highlight_topics):
                colors[ent['label']] = topic_color_dict[ht]

            ex = [{"text": query,
                   "ents": ents,
                   "title": None}]
            title = "Unneeded Teaching Reason Highlights"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            html = displacy.render(ex, style="ent", manual=True, jupyter=True, options={'colors': colors})
            display(HTML(html))
            title = "Unneeded Teaching Reason Classifications"
            display(HTML(f'<center><h1>{title}</h1></center>'))
            for top in topic_color_dict.keys():
                top_result_df = sub_result_df[sub_result_df['Topic'] == top]
                if len(top_result_df) > 0:
                    top_result_df = top_result_df.sort_values(by='Score', ascending=False).reset_index(drop=True)
                    top_result_df = top_result_df.set_index('Phrase')
                    top_result_df = top_result_df[['Score']]
                    display(HTML(
                        f'<left><h2 style="text-decoration: underline; text-decoration-color:{topic_color_dict[top]};">{top}</h2></left>'))
                    display(color(top_result_df, topic_color_dict[top]))
            
            agg_df = sub_result_df.groupby('Topic')['Score'].sum()
            agg_df = agg_df.to_frame()
            agg_df.index.name = 'Topic'
            agg_df.columns = ['Total Score']
            agg_df = agg_df.assign(
                Final_Score=lambda x: x['Total Score'] / x['Total Score'].sum() * 100.00
            )
            agg_df = agg_df.sort_values(by='Final_Score', ascending=False)
            agg_df['Topic'] = agg_df.index
            rem_topics= [x for x in list(topic_color_dict.keys()) if not x in agg_df.Topic.tolist()]
            if len(rem_topics) > 0:
                rem_agg_df = pd.DataFrame({'Topic': rem_topics, 'Final_Score': 0.0, 'Total Score': 0.0})
                agg_df = pd.concat([agg_df, rem_agg_df])
            title = "Final Scores"
            display(HTML(f'<left><h1>{title}</h1></left>'))
            display_final_df(agg_df)
            if len(sub_2_result_df) > 0:
                sub_result_df = pd.concat([sub_result_df, sub_2_result_df]).reset_index(drop=True)
            agrilla_df = sub_result_df.copy()
        else:
            print(query)
            display_final_df('None')
            if len(sub_2_result_df) > 0:
                agrilla_df = sub_2_result_df.copy()

def on_agr_button_next(b):
    global agrilla_df, annotated
    with bhvr_outt:
        clear_output()
        if agrilla_df is not None:
            # convert the dataframe to the structure accepted by argilla
            converted_df = convert_df(agrilla_df)
            # convert pandas dataframe to DatasetForTextClassification
            dataset_rg = rg.DatasetForTextClassification.from_pandas(converted_df)
            # delete the old DatasetForTextClassification from the Argilla web app if exists
            rg.delete(dataset_rg_name, workspace="admin")
            # load the new DatasetForTextClassification into the Argilla web app
            rg.log(dataset_rg, name=dataset_rg_name, workspace="admin")
            # Make sure all classes are present for annotation
            rg_settings = rg.TextClassificationSettings(label_schema=list(topic_color_dict.keys()))
            rg.configure_dataset(name=dataset_rg_name, workspace="admin", settings=rg_settings)
            annotated = True
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer first!</h2>"))
            
def on_eval_button_next(b):
    global annotated
    with bhvr_outt:
        clear_output()
        if annotated:
            display(f1(dataset_rg_name).visualize())
        else:
            display(Markdown("<h2 style='color:red; text-align:center;'>Please score the answer and validate the data first!</h2>"))

bhvr_nlp_btn.on_click(on_bhvr_button_next)
bhvr_agr_btn.on_click(on_agr_button_next)
bhvr_eval_btn.on_click(on_eval_button_next)

display(bhvr_label, bhvr_box)