Rebiber / app.py
yuchenlin's picture
Update app.py
42c6ece verified
raw
history blame
4.23 kB
import os
import uuid
import gradio as gr
import rebiber
# ---------- preload DB ---------- #
PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
bib_db = rebiber.construct_bib_db(
os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT
)
abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv")
# ---------- helper functions ---------- #
def process(input_bib: str,
shorten: bool,
remove_keys: list[str],
deduplicate: bool,
sort_ids: bool):
"""Normalize a BibTeX string and write it to disk."""
if "@" not in input_bib:
# Nothing that looks like BibTeX
return "N/A", "", gr.Button.update(visible=False)
run_id = uuid.uuid4().hex
in_file = f"input_{run_id}.bib"
out_file = f"output_{run_id}.bib"
# Write raw input
with open(in_file, "w") as f:
f.write(input_bib.replace("\t", " "))
entries = rebiber.load_bib_file(in_file)
rebiber.normalize_bib(
bib_db,
entries,
out_file,
abbr_dict = abbr_dict if shorten else [],
deduplicate = deduplicate,
sort = sort_ids,
removed_value_names = remove_keys,
)
with open(out_file) as f:
output_bib = f.read().replace("\n ", "\n ")
# Show output + enable “Download” button
return output_bib, run_id, gr.update(visible=True)
def download_file(run_id: str):
"""Expose the normalized .bib for download."""
file_path = f"output_{run_id}.bib"
return file_path, gr.update(visible=True)
# ---------- demo UI ---------- #
EXAMPLE = """
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
"""
with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# **Rebiber** – normalise those BibTeX entries!
🐼 [Project page](https://yuchenlin.xyz/)   |  
:octocat: [GitHub](https://github.com/yuchenlin/rebiber)   |  
🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272)
Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL
Anthology), deduplicates, sorts, and can abbreviate conference names.
"""
)
with gr.Row():
# ---------- left column ---------- #
with gr.Column(scale=3):
input_bib = gr.Textbox(
label="Input BibTeX",
value=EXAMPLE,
lines=15,
interactive=True,
)
removekeys = gr.CheckboxGroup(
["url", "biburl", "address", "publisher",
"pages", "doi", "volume", "bibsource"],
label="Remove fields",
info="Select the keys you’d like to strip",
)
shorten = gr.Checkbox(label="Abbreviate venue names")
dedup = gr.Checkbox(label="Deduplicate entries")
sort = gr.Checkbox(label="Sort IDs alphabetically")
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit")
run_uuid = gr.Textbox(visible=False)
# ---------- right column ---------- #
with gr.Column(scale=3):
output_box = gr.Textbox(
label="Normalised BibTeX",
interactive=False,
show_copy_button=True, # :contentReference[oaicite:2]{index=2}
)
download_btn = gr.Button("Generate . bib file", visible=False)
download_file_component = gr.File(visible=False)
# ---------- wiring ---------- #
submit_btn.click(
process,
inputs=[input_bib, shorten, removekeys, dedup, sort],
outputs=[output_box, run_uuid, download_btn],
api_name="process",
)
download_btn.click(download_file, run_uuid, [download_file_component, download_file_component])
clear_btn.click(lambda: "", None, input_bib)
if __name__ == "__main__":
demo.launch()