Spaces:
Sleeping
Sleeping
File size: 6,406 Bytes
c77734f 97b056e 7365e02 97b056e 69508a2 7365e02 97b056e 7365e02 847199e 97b056e 847199e 97b056e 7365e02 97b056e 2ecd2f9 69508a2 2ecd2f9 69508a2 2ecd2f9 69508a2 2ecd2f9 69508a2 2ecd2f9 97b056e 2ecd2f9 69508a2 c51be4c 2ecd2f9 c51be4c 2ecd2f9 c51be4c 2ecd2f9 69508a2 c51be4c 2ecd2f9 c51be4c 2ecd2f9 c51be4c 2ecd2f9 69508a2 ab43a0e 2ecd2f9 69508a2 ab43a0e 2ecd2f9 780f571 2ecd2f9 780f571 2c1f5f8 ab43a0e 2ecd2f9 69508a2 97b056e 2ecd2f9 847199e 670de19 f02578a 847199e 670de19 847199e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import streamlit as st
@st.cache_data
def prepare_model():
"""
Prepare the tokenizer and the model for classification.
"""
tokenizer = AutoTokenizer.from_pretrained("oracat/bert-paper-classifier")
model = AutoModelForSequenceClassification.from_pretrained(
"oracat/bert-paper-classifier"
)
return (tokenizer, model)
def process(text):
"""
Translate incoming text to tokens and classify it
"""
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=3)
result = pipe(text)[0]
result = sorted(result, key=lambda x: -x["score"])
cum_score = 0
for i, item in enumerate(result):
cum_score += item["score"]
if cum_score >= 0.95:
break
result = result[: (i + 1)]
return result
tokenizer, model = prepare_model()
# State managements
#
# The state in the app is the title and the abstract.
# State management is used here in order to pre-fill
# input fields with values for demos.
if "title" not in st.session_state:
st.session_state["title"] = ""
if "abstract" not in st.session_state:
st.session_state["abstract"] = ""
if "output" not in st.session_state:
st.session_state["output"] = ""
# Simple streamlit interface
st.markdown("### Hello, paper classifier!")
## Demo buttons and their callbacks
def demo_immunology_callback():
"""
Use https://www.biorxiv.org/content/10.1101/2022.12.01.518788v1 for demo
"""
paper_title = "Using TCR and BCR sequencing to unravel the role of T and B cells in abdominal aortic aneurysm"
paper_abstract = "Recent evidence suggests that AAA displays characteristics of an autoimmune disease and it gained increasing prominence that specific antigen-driven T cells in the aortic tissue may contribute to the initial immune response. We found no clonal expansion of TCRs or BCRs in elastase-induced AAA in mice."
st.session_state["title"] = paper_title
st.session_state["abstract"] = paper_abstract
def demo_virology_callback():
"""
Use https://doi.org/10.1016/j.cell.2020.08.001 for demo
"""
paper_title = "Severe COVID-19 Is Marked by a Dysregulated Myeloid Cell Compartment"
paper_abstract = "Coronavirus disease 2019 (COVID-19) is a mild to moderate respiratory tract infection, however, a subset of patients progress to severe disease and respiratory failure. The mechanism of protective immunity in mild forms and the pathogenesis of severe COVID-19 associated with increased neutrophil counts and dysregulated immune responses remain unclear. In a dual-center, two-cohort study, we combined single-cell RNA-sequencing and single-cell proteomics of whole-blood and peripheral-blood mononuclear cells to determine changes in immune cell composition and activation in mild versus severe COVID-19 (242 samples from 109 individuals) over time. HLA-DRhiCD11chi inflammatory monocytes with an interferon-stimulated gene signature were elevated in mild COVID-19. Severe COVID-19 was marked by occurrence of neutrophil precursors, as evidence of emergency myelopoiesis, dysfunctional mature neutrophils, and HLA-DRlo monocytes. Our study provides detailed insights into the systemic immune response to SARS-CoV-2 infection and reveals profound alterations in the myeloid cell compartment associated with severe COVID-19."
st.session_state["title"] = paper_title
st.session_state["abstract"] = paper_abstract
def demo_microbiology_callback():
"""
Use https://doi.org/10.1016/j.cell.2023.01.002 for demo
"""
paper_title = "Bacterial droplet-based single-cell RNA-seq reveals antibiotic-associated heterogeneous cellular states"
paper_abstract = "We introduce BacDrop, a highly scalable technology for bacterial single-cell RNA sequencing that has overcome many challenges hindering the development of scRNA-seq in bacteria. BacDrop can be applied to thousands to millions of cells from both gram-negative and gram-positive species. It features universal ribosomal RNA depletion and combinatorial barcodes that enable multiplexing and massively parallel sequencing. We applied BacDrop to study Klebsiella pneumoniae clinical isolates and to elucidate their heterogeneous responses to antibiotic stress. In an unperturbed population presumed to be homogeneous, we found within-population heterogeneity largely driven by the expression of mobile genetic elements that promote the evolution of antibiotic resistance. Under antibiotic perturbation, BacDrop revealed transcriptionally distinct subpopulations associated with different phenotypic outcomes including antibiotic persistence. BacDrop thus can capture cellular states that cannot be detected by bulk RNA-seq, which will unlock new microbiological insights into bacterial responses to perturbations and larger bacterial communities such as the microbiome."
st.session_state["title"] = paper_title
st.session_state["abstract"] = paper_abstract
def clear_callback():
"""
Clear input fields
"""
st.session_state["title"] = ""
st.session_state["abstract"] = ""
st.session_state["output"] = ""
col1, col2, col3, col4 = st.columns([1, 1, 1, 1])
with col1:
st.button("Demo: immunology", on_click=demo_immunology_callback)
with col2:
st.button("Demo: virology", on_click=demo_virology_callback)
with col3:
st.button("Demo: microbiology", on_click=demo_microbiology_callback)
with col4:
st.button("Clear fields", on_click=clear_callback)
## Input fields
placeholder = st.empty()
title = st.text_input("Enter the title:", key="title")
abstract = st.text_area(
"... and maybe the abstract of the paper you want to classify:", key="abstract"
)
text = "\n".join([title, abstract])
## Output
if len(text.strip()) > 0:
results = process(text)
if len(results) == 0:
out_text = ""
else:
out_text = f"This paper is likely to be from the category **{results[0]['label']}** *(score {results[0]['score']:.2f})*."
if len(results) > 1:
out_text += "\n\n(Other fitting categories are " + " and ".join(
[
f"{item['label']} *(score {item['score']:.2f})*"
for item in results[1:]
]
)
out_text += ".)"
st.markdown(out_text)
|