File size: 6,406 Bytes
c77734f
97b056e
 
 
 
7365e02
 
 
 
97b056e
69508a2
 
 
7365e02
97b056e
 
 
7365e02
 
 
847199e
97b056e
847199e
 
 
 
 
 
 
 
 
 
 
 
97b056e
 
7365e02
97b056e
2ecd2f9
 
69508a2
2ecd2f9
 
 
 
69508a2
 
2ecd2f9
69508a2
 
2ecd2f9
69508a2
 
2ecd2f9
 
 
 
97b056e
 
2ecd2f9
 
 
69508a2
c51be4c
2ecd2f9
c51be4c
2ecd2f9
c51be4c
 
2ecd2f9
 
 
69508a2
c51be4c
2ecd2f9
c51be4c
2ecd2f9
c51be4c
 
2ecd2f9
 
 
69508a2
ab43a0e
 
 
 
 
 
 
 
 
 
2ecd2f9
 
 
 
 
 
 
 
69508a2
ab43a0e
2ecd2f9
780f571
2ecd2f9
780f571
2c1f5f8
 
ab43a0e
2ecd2f9
 
 
 
 
 
 
69508a2
 
 
97b056e
 
 
2ecd2f9
 
 
847199e
 
 
 
 
 
670de19
f02578a
 
 
 
847199e
670de19
847199e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import streamlit as st


@st.cache_data
def prepare_model():
    """
    Prepare the tokenizer and the model for classification.
    """
    tokenizer = AutoTokenizer.from_pretrained("oracat/bert-paper-classifier")
    model = AutoModelForSequenceClassification.from_pretrained(
        "oracat/bert-paper-classifier"
    )
    return (tokenizer, model)


def process(text):
    """
    Translate incoming text to tokens and classify it
    """
    pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=3)
    result = pipe(text)[0]

    result = sorted(result, key=lambda x: -x["score"])

    cum_score = 0
    for i, item in enumerate(result):
        cum_score += item["score"]
        if cum_score >= 0.95:
            break

    result = result[: (i + 1)]

    return result


tokenizer, model = prepare_model()


# State managements
#
# The state in the app is the title and the abstract.
# State management is used here in order to pre-fill
# input fields with values for demos.

if "title" not in st.session_state:
    st.session_state["title"] = ""

if "abstract" not in st.session_state:
    st.session_state["abstract"] = ""

if "output" not in st.session_state:
    st.session_state["output"] = ""


# Simple streamlit interface

st.markdown("### Hello, paper classifier!")


## Demo buttons and their callbacks


def demo_immunology_callback():
    """
    Use https://www.biorxiv.org/content/10.1101/2022.12.01.518788v1 for demo
    """
    paper_title = "Using TCR and BCR sequencing to unravel the role of T and B cells in abdominal aortic aneurysm"
    paper_abstract = "Recent evidence suggests that AAA displays characteristics of an autoimmune disease and it gained increasing prominence that specific antigen-driven T cells in the aortic tissue may contribute to the initial immune response. We found no clonal expansion of TCRs or BCRs in elastase-induced AAA in mice."
    st.session_state["title"] = paper_title
    st.session_state["abstract"] = paper_abstract


def demo_virology_callback():
    """
    Use https://doi.org/10.1016/j.cell.2020.08.001 for demo
    """
    paper_title = "Severe COVID-19 Is Marked by a Dysregulated Myeloid Cell Compartment"
    paper_abstract = "Coronavirus disease 2019 (COVID-19) is a mild to moderate respiratory tract infection, however, a subset of patients progress to severe disease and respiratory failure. The mechanism of protective immunity in mild forms and the pathogenesis of severe COVID-19 associated with increased neutrophil counts and dysregulated immune responses remain unclear. In a dual-center, two-cohort study, we combined single-cell RNA-sequencing and single-cell proteomics of whole-blood and peripheral-blood mononuclear cells to determine changes in immune cell composition and activation in mild versus severe COVID-19 (242 samples from 109 individuals) over time. HLA-DRhiCD11chi inflammatory monocytes with an interferon-stimulated gene signature were elevated in mild COVID-19. Severe COVID-19 was marked by occurrence of neutrophil precursors, as evidence of emergency myelopoiesis, dysfunctional mature neutrophils, and HLA-DRlo monocytes. Our study provides detailed insights into the systemic immune response to SARS-CoV-2 infection and reveals profound alterations in the myeloid cell compartment associated with severe COVID-19."
    st.session_state["title"] = paper_title
    st.session_state["abstract"] = paper_abstract


def demo_microbiology_callback():
    """
    Use https://doi.org/10.1016/j.cell.2023.01.002 for demo
    """
    paper_title = "Bacterial droplet-based single-cell RNA-seq reveals antibiotic-associated heterogeneous cellular states"
    paper_abstract = "We introduce BacDrop, a highly scalable technology for bacterial single-cell RNA sequencing that has overcome many challenges hindering the development of scRNA-seq in bacteria. BacDrop can be applied to thousands to millions of cells from both gram-negative and gram-positive species. It features universal ribosomal RNA depletion and combinatorial barcodes that enable multiplexing and massively parallel sequencing. We applied BacDrop to study Klebsiella pneumoniae clinical isolates and to elucidate their heterogeneous responses to antibiotic stress. In an unperturbed population presumed to be homogeneous, we found within-population heterogeneity largely driven by the expression of mobile genetic elements that promote the evolution of antibiotic resistance. Under antibiotic perturbation, BacDrop revealed transcriptionally distinct subpopulations associated with different phenotypic outcomes including antibiotic persistence. BacDrop thus can capture cellular states that cannot be detected by bulk RNA-seq, which will unlock new microbiological insights into bacterial responses to perturbations and larger bacterial communities such as the microbiome."
    st.session_state["title"] = paper_title
    st.session_state["abstract"] = paper_abstract


def clear_callback():
    """
    Clear input fields
    """
    st.session_state["title"] = ""
    st.session_state["abstract"] = ""
    st.session_state["output"] = ""


col1, col2, col3, col4 = st.columns([1, 1, 1, 1])
with col1:
    st.button("Demo: immunology", on_click=demo_immunology_callback)
with col2:
    st.button("Demo: virology", on_click=demo_virology_callback)
with col3:
    st.button("Demo: microbiology", on_click=demo_microbiology_callback)
with col4:
    st.button("Clear fields", on_click=clear_callback)

## Input fields

placeholder = st.empty()

title = st.text_input("Enter the title:", key="title")
abstract = st.text_area(
    "... and maybe the abstract of the paper you want to classify:", key="abstract"
)

text = "\n".join([title, abstract])

## Output

if len(text.strip()) > 0:
    results = process(text)
    if len(results) == 0:
        out_text = ""
    else:
        out_text = f"This paper is likely to be from the category **{results[0]['label']}** *(score {results[0]['score']:.2f})*."
        if len(results) > 1:
            out_text += "\n\n(Other fitting categories are " + " and ".join(
                [
                    f"{item['label']} *(score {item['score']:.2f})*"
                    for item in results[1:]
                ]
            )
            out_text += ".)"
    st.markdown(out_text)