from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import streamlit as st


@st.cache_data
def prepare_model():
    """
    Prepare the tokenizer and the model for classification.
    """
    tokenizer = AutoTokenizer.from_pretrained("oracat/bert-paper-classifier")
    model = AutoModelForSequenceClassification.from_pretrained("oracat/bert-paper-classifier")
    return (tokenizer, model)


def process(text):
    """
    Translate incoming text to tokens and classify it
    """
    pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
    result = pipe(text)[0]
    return results['label']


tokenizer, model = prepare_model()

st.markdown("### Hello, paper classifier!")

title = st.text_input("Enter the title...")
abstract = st.text_area("... and maybe the abstract of the paper you want to classify")

text = "\n".join([title, abstract])

st.markdown(f"{process(text)}")