Spaces:

lorenpe2
/

next-sentence-probability

Runtime error

File size: 4,324 Bytes

import os
import json
from typing import Dict, List, Tuple, Union

import torch
import pandas
import streamlit as st
import matplotlib.pyplot as plt

from inference_tokenizer import NextSentencePredictionTokenizer


@st.cache_resource
def get_model(model_path):
    from transformers import BertForNextSentencePrediction
    _model = BertForNextSentencePrediction.from_pretrained(model_path)
    _model.eval()
    return _model


@st.cache_resource
def get_tokenizer(tokenizer_path):
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained(os.path.join(tokenizer_path, "tokenizer"))
    tokenizer_args = {
        "padding": "max_length",
        "max_length_ctx": 256,
        "max_length_res": 64,
        "truncation": "only_first",
        "return_tensors": "np",
        # will be transfer to tensor later during the training (because of some memory problem with tensors)
        "is_split_into_words": True,
    }
    special_token = " "
    # todo better than hardcoded
    if tokenizer_path == "./model/e09d71f55f4b6fc20135f856bf029322a3265d8d":
        special_token = "[unused1]"
        tokenizer.add_special_tokens({"additional_special_tokens": [special_token]})
    _inference_tokenizer = NextSentencePredictionTokenizer(tokenizer, special_token=special_token, **tokenizer_args)
    return _inference_tokenizer


model_option = st.selectbox(
    'Which model do you want to use?',
    ('./model/c3c3bdb7ad80396e69de171995e2038f900940c8', './model/e09d71f55f4b6fc20135f856bf029322a3265d8d'))

model = get_model(model_option)
inference_tokenizer = get_tokenizer(model_option)


def get_evaluation_data(_context: List, special_delimiter=" "):
    output_data = []
    for _dict in _context:
        _dict: Dict
        c = special_delimiter.join(_dict["context"])
        for source in _dict["answers"].values():
            for _t, sentences in source.items():
                for sentence in sentences:
                    output_data.append([c, sentence, _t])
    return output_data


option = st.selectbox("Choose type of evaluation:",
                      ["01 - Raw text (one line)", "02 - JSON (aggregated)"])

with st.form("input_text"):
    if "01" in option:
        context = st.text_area("Insert context here (sentences divided by ||):")
        actual_text = st.text_input("Actual text")

        input_tensor = inference_tokenizer.get_item(context=context, actual_sentence=actual_text)
        output_model = model(**input_tensor.data).logits

        output_model = torch.softmax(output_model, dim=-1).detach().numpy()[0]
        prop_follow = output_model[0]
        prop_not_follow = output_model[1]

        # Every form must have a submit button.
        submitted = st.form_submit_button("Submit")
        if submitted:
            fig, ax = plt.subplots()
            ax.pie([prop_follow, prop_not_follow], labels=["Probability - Follow", "Probability - Not Follow"],
                   autopct='%1.1f%%')
            st.pyplot(fig)
    elif "02" in option:
        context = st.text_area("Insert JSON here")
        if "{" in context:
            evaluation_data = get_evaluation_data(_context=json.loads(context))
        results = []
        accuracy = []
        # Every form must have a submit button.
        submitted = st.form_submit_button("Submit")
        if submitted:
            for datapoint in evaluation_data:
                c, s, human_label = datapoint
                input_tensor = inference_tokenizer.get_item(context=c, actual_sentence=s)
                output_model = model(**input_tensor.data).logits
                output_model = torch.softmax(output_model, dim=-1).detach().numpy()[0]
                prop_follow = output_model[0]
                prop_not_follow = output_model[1]

                results.append((c, s, human_label, prop_follow, prop_not_follow))
                if human_label == "coherent":
                    accuracy.append(int(prop_follow > prop_not_follow))
                else:
                    accuracy.append(int(prop_not_follow > prop_follow))
            st.metric(label="Accuracy", value=f"{sum(accuracy) / len(accuracy)} %")
            df = pandas.DataFrame(results, columns=["Context", "Query", "Human Label", "Probability (follow)", "Probability (not-follow)"])
            st.dataframe(df)