import os import uuid import gradio as gr import rebiber # ---------- preload DB ---------- # PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "") bib_db = rebiber.construct_bib_db( os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT ) abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv") # ---------- helper functions ---------- # def process(input_bib: str, shorten: bool, remove_keys: list[str], deduplicate: bool, sort_ids: bool): """Normalize a BibTeX string and write it to disk.""" if "@" not in input_bib: # Nothing that looks like BibTeX return "N/A", "", gr.Button.update(visible=False) run_id = uuid.uuid4().hex in_file = f"input_{run_id}.bib" out_file = f"output_{run_id}.bib" # Write raw input with open(in_file, "w") as f: f.write(input_bib.replace("\t", " ")) entries = rebiber.load_bib_file(in_file) rebiber.normalize_bib( bib_db, entries, out_file, abbr_dict = abbr_dict if shorten else [], deduplicate = deduplicate, sort = sort_ids, removed_value_names = remove_keys, ) with open(out_file) as f: output_bib = f.read().replace("\n ", "\n ") # Show output + enable “Download” button return output_bib, run_id, gr.update(visible=True) def download_file(run_id: str): """Expose the normalized .bib for download.""" file_path = f"output_{run_id}.bib" return file_path, gr.update(visible=True) # ---------- demo UI ---------- # EXAMPLE = """ @article{lin2020birds, title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, journal={arXiv preprint arXiv:2005.00683}, year={2020} } """ with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # **Rebiber** – normalise those BibTeX entries! 🐼 [Project page](https://yuchenlin.xyz/)   |   :octocat: [GitHub](https://github.com/yuchenlin/rebiber)   |   🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272) Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL Anthology), deduplicates, sorts, and can abbreviate conference names. """ ) with gr.Row(): # ---------- left column ---------- # with gr.Column(scale=3): input_bib = gr.Textbox( label="Input BibTeX", value=EXAMPLE, lines=15, interactive=True, ) removekeys = gr.CheckboxGroup( ["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"], label="Remove fields", info="Select the keys you’d like to strip", ) shorten = gr.Checkbox(label="Abbreviate venue names") dedup = gr.Checkbox(label="Deduplicate entries") sort = gr.Checkbox(label="Sort IDs alphabetically") with gr.Row(): clear_btn = gr.Button("Clear") submit_btn = gr.Button("Submit") run_uuid = gr.Textbox(visible=False) # ---------- right column ---------- # with gr.Column(scale=3): output_box = gr.Textbox( label="Normalised BibTeX", interactive=False, show_copy_button=True, # :contentReference[oaicite:2]{index=2} ) download_btn = gr.Button("Generate . bib file", visible=False) download_file_component = gr.File(visible=False) # ---------- wiring ---------- # submit_btn.click( process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output_box, run_uuid, download_btn], api_name="process", ) download_btn.click(download_file, run_uuid, [download_file_component, download_file_component]) clear_btn.click(lambda: "", None, input_bib) if __name__ == "__main__": demo.launch()