File size: 931 Bytes
c77734f
97b056e
 
 
 
7365e02
 
 
 
97b056e
 
7365e02
97b056e
 
 
7365e02
 
 
97b056e
 
8397f59
97b056e
 
7365e02
97b056e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import streamlit as st


@st.cache_data
def prepare_model():
    """
    Prepare the tokenizer and the model for classification.
    """
    tokenizer = AutoTokenizer.from_pretrained("oracat/bert-paper-classifier")
    model = AutoModelForSequenceClassification.from_pretrained("oracat/bert-paper-classifier")
    return (tokenizer, model)


def process(text):
    """
    Translate incoming text to tokens and classify it
    """
    pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
    result = pipe(text)[0]
    return result'label']


tokenizer, model = prepare_model()

st.markdown("### Hello, paper classifier!")

title = st.text_input("Enter the title...")
abstract = st.text_area("... and maybe the abstract of the paper you want to classify")

text = "\n".join([title, abstract])

st.markdown(f"{process(text)}")