File size: 6,866 Bytes
c186b27
2b6660e
c186b27
 
 
 
 
6457b4b
c186b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b6660e
 
 
 
 
 
 
 
 
 
c186b27
2b6660e
 
 
c186b27
 
 
2b6660e
 
 
 
 
 
 
c186b27
2b6660e
 
c186b27
2b6660e
c186b27
2b6660e
 
 
 
5585321
c186b27
 
 
 
 
 
2b6660e
c186b27
 
 
e8d577f
5585321
 
 
 
 
 
 
 
 
 
 
2b6660e
254630f
 
 
5585321
 
254630f
c186b27
 
 
2b6660e
 
 
c186b27
 
 
 
 
 
 
 
 
 
 
 
 
 
254630f
 
 
 
c9566b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254630f
 
c186b27
5585321
c186b27
 
2b6660e
c186b27
 
5585321
c186b27
 
 
 
 
 
6457b4b
c186b27
 
 
 
 
 
2b6660e
 
c186b27
5585321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b6660e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import glob
import json
from typing import Dict, List, Tuple, Union

import torch
import pandas
import streamlit as st
import matplotlib.pyplot as plt

from inference_tokenizer import NextSentencePredictionTokenizer


@st.cache_resource
def get_model(model_path):
    from transformers import BertForNextSentencePrediction
    _model = BertForNextSentencePrediction.from_pretrained(model_path)
    _model.eval()
    return _model


@st.cache_resource
def get_tokenizer(tokenizer_path):
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
    if os.path.isfile(os.path.join(tokenizer_path, "meta-info.json")):
        with open(os.path.join(tokenizer_path, "meta-info.json"), "r") as f:
            meta_info = json.load(f)
            tokenizer_args = meta_info["tokenizer_args"]
            special_token = meta_info["kwargs"]["special_token"]
    else:
        raise FileNotFoundError("Tokenizer is provided without meta-info.json. Cannot interfere proper configuration!")

    if special_token != " ":
        tokenizer.add_special_tokens({"additional_special_tokens": [special_token]})
    print(special_token)
    print(tokenizer_args)
    _inference_tokenizer = NextSentencePredictionTokenizer(tokenizer, **tokenizer_args)
    return _inference_tokenizer


models_path = glob.glob("./model/*/info.json")
models = {}
for model_path in models_path:
    with open(model_path, "r") as f:
        model_data = json.load(f)
        model_data["path"] = model_path.replace("info.json", "")
        models[model_data["model"]] = model_data

model_name = st.selectbox('Which model do you want to use?',
                          (x for x in sorted(models.keys())))

model_path = models[model_name]["path"]

model = get_model(model_path)
inference_tokenizer = get_tokenizer(model_path)


def get_evaluation_data_from_json(_context: List) -> List[Tuple[List, str, str]]:
    output_data = []
    for _dict in _context:
        _dict: Dict
        for source in _dict["answers"].values():
            for _t, sentences in source.items():
                for sentence in sentences:
                    output_data.append((_dict["context"], sentence, _t))
    return output_data


def get_evaluation_data_from_dialogue(_context: List) -> List[Tuple[List, str, Union[str, None]]]:
    output_data = []
    for idx, _line in enumerate(_context):
        if idx == 0:
            continue
        actual_context = _context[max(0, idx - 5):idx]
        actual_sentence = _line
        for context_idx in range(len(actual_context)):
            output_data.append((actual_context[-context_idx:], actual_sentence, None))
    return output_data


option = st.selectbox("Choose type of input:",
                      ["01 - String (one turn per line)",
                       "02 - JSON (aggregated)",
                       "03 - JSON (example CA-OOD)",
                       "04 - JSON (example Elysai)",
                       "05 - Diagnostic mode"])


with st.form("input_text"):
    if "01" in option:
        context = st.text_area("Insert context here (one turn per line):")
        actual_text = st.text_input("Insert current turn:")
        context = list(filter(lambda x: len(x.strip()) >= 1, context.split("\n")))

        input_tensor = inference_tokenizer.get_item(context=context, actual_sentence=actual_text)
        output_model = model(**input_tensor.data).logits

        output_model = torch.softmax(output_model, dim=-1).detach().numpy()[0]
        prop_follow = output_model[0]
        prop_not_follow = output_model[1]

        submitted = st.form_submit_button("Submit")
        if submitted:
            fig, ax = plt.subplots()
            ax.pie([prop_follow, prop_not_follow], labels=["Probability - Follow", "Probability - Not Follow"],
                   autopct='%1.1f%%')
            st.pyplot(fig)
    elif "02" in option or "03" in option or "04" in option:
        from data.example_data import ca_ood, elysai
        choices = [ca_ood, elysai]
        option: str
        # > Python 3.10
        # match option.split("-")[0].strip():
        #     case "03":
        #         text = json.dumps(choices[0])
        #     case "04":
        #         text = json.dumps(choices[1])
        #     case _:
        #         text = ""
        option = option.split("-")[0].strip()
        if option == "03":
            text = json.dumps(choices[0])
        elif option == "04":
            text = json.dumps(choices[1])
        else:
            test = ""
        context = st.text_area("Insert JSON here:", value=str(text))

        if "{" in context:
            data_for_evaluation = get_evaluation_data_from_json(_context=json.loads(context))
        results = []
        accuracy = []

        submitted = st.form_submit_button("Submit")
        if submitted:
            for datapoint in data_for_evaluation:
                c, s, human_label = datapoint
                input_tensor = inference_tokenizer.get_item(context=c, actual_sentence=s)
                output_model = model(**input_tensor.data).logits
                output_model = torch.softmax(output_model, dim=-1).detach().numpy()[0]
                prop_follow = output_model[0]
                prop_not_follow = output_model[1]

                results.append((c, s, human_label, prop_follow, prop_not_follow))
                if human_label == "coherent":
                    accuracy.append(int(prop_follow > prop_not_follow))
                else:
                    accuracy.append(int(prop_not_follow > prop_follow))
            st.metric(label="Accuracy", value=f"{sum(accuracy) / len(accuracy)} %")
            df = pandas.DataFrame(results, columns=["Context", "Query", "Human Label", "Probability (follow)",
                                                    "Probability (not-follow)"])
            st.dataframe(df)
    elif "05" in option:
        context = st.text_area("Insert dialogue here (one turn per line):")
        submitted = st.form_submit_button("Submit")
        if submitted:
            aggregated_result = []
            data_for_evaluation = get_evaluation_data_from_dialogue(context.split("\n"))
            for datapoint in data_for_evaluation:
                c, s, _ = datapoint
                input_tensor = inference_tokenizer.get_item(context=c, actual_sentence=s)
                output_model = model(**input_tensor.data).logits
                output_model = torch.softmax(output_model, dim=-1).detach().numpy()[0]
                prop_follow = output_model[0]
                prop_not_follow = output_model[1]

                aggregated_result.append((c, s, prop_follow))
            st.table(aggregated_result)

st.markdown("## Description of models:")
for x in sorted(models.values(), key=lambda x: x["model"]):
    st.write((str(x["model"] + " - " + x["description"])))