Spaces:

yuchenlin
/

Rebiber

Running

App Files Files Community

yuchenlin commited on Jul 9

Commit

42c6ece

verified ·

1 Parent(s): 7c59eee

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -144

app.py CHANGED Viewed

@@ -1,170 +1,128 @@
-import gradio as gr
-import rebiber
 import os
 import uuid
-# Load Bib Database
-filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
-bib_list_path = os.path.join(filepath, "bib_list.txt")
-abbr_tsv_path = "abbr.tsv"
-bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
-abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)
-def process(input_bib, shorten, remove_keys, deduplicate, sort):
-    if "@" not in input_bib:
-        return "N/A"
-    global abbr_dict
-    # print(f"remove_keys={remove_keys}")
-    random_id = uuid.uuid4().hex
-    with open(f"input_{random_id}.bib", "w") as f:
-        f.write(input_bib.replace("\t", "    "))
-    all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
-    print("# Input Bib Entries:", len(all_bib_entries))
-    abbr_dict_pass = []
-    if shorten:
-        abbr_dict_pass = abbr_dict
-    rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
-                          abbr_dict=abbr_dict_pass,
-                          deduplicate=deduplicate,
-                          sort=sort,
-                          removed_value_names=remove_keys)
-    with open(f"output_{random_id}.bib") as f:
         output_bib = f.read().replace("\n ", "\n    ")
-    # delete both files
-    # print(output_bib)
-    return output_bib, random_id, gr.update(visible=True)
-example_input = """
 @article{lin2020birds,
     title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
     author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
     journal={arXiv preprint arXiv:2005.00683},
     year={2020}
-}
-@inproceedings{Lin2020CommonGenAC,
-  title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
-  author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
-  booktitle={Findings},
-  year={2020}
 }
-"""
-examples = [[example_input]]
-# iface = gr.Interface(fn=process,
-#     inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
-#     outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
-#     examples=examples,
-#     allow_flagging="never"
-#     )
-with gr.Blocks() as demo:
-    gr.Markdown(
-            '''# Rebiber: A tool for normalizing bibtex with official info.
-                <table>
-                <tr>
-                <td>
-                <a href="https://yuchenlin.xyz/">
-                    <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
-                </a>
-                </td>
-                <td>
-                <a href="https://github.com/yuchenlin/rebiber">
-                    <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
-                </a>
-                </td>
-                <td>
-                <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
-                    <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
-                </a>
-                </td>
-                </tr>
-                </table>
-                <span style="font-size:13pt">
-                We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
-                We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
-                Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
-                </span>
-            '''
     )
     with gr.Row():
         with gr.Column(scale=3):
-            input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
-            removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"],
-                                value=[False, False, False, False, False, False, False, False],
-                                label="Remove Keys", info="Which keys to remove?")
-            shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)
-            dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
-            sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
             with gr.Row():
-                clr_button = gr.Button("Clear")
-                button = gr.Button("Submit")
-            ex_uuid = gr.Text(label="UUID")
-            ex_uuid.visible = False
-        with gr.Column(scale=3):
-            output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
-            download_btn = gr.Button("Generate Bib File")
-            download_btn.visible = False
-            download_content = gr.outputs.File()
-            download_content.visible = False
-    def download_file(ex_uuid):
-        global download_content
-        # Replace this with your code to generate/download the file
-        file_path = f"output_{ex_uuid}.bib"
-        download_content.update(visible=False)
-        return file_path, gr.update(visible=True)
-    download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
-    button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
-    def clean(text):
-        return ""
-    clr_button.click(clean, input_bib, input_bib)
-    # gr.Interface(fn=process,
-    # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
-    # examples=examples,
-    # allow_flagging="never",
-    # scroll_to_output=True,
-    # show_progress=True,
-    # )
-if __name__ == "__main__":
-    demo.launch()
-"""
-@article{lin2020birds,
-    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
-    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
-    journal={arXiv preprint arXiv:2005.00683},
-    year={2020}
-}
-@inproceedings{lin2020birds,
- address = {Online},
- author = {Lin, Bill Yuchen  and
-Lee, Seyeon  and
-Khanna, Rahul  and
-Ren, Xiang},
- booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
- doi = {10.18653/v1/2020.emnlp-main.557},
- pages = {6862--6868},
- publisher = {Association for Computational Linguistics},
- title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
- url = {https://aclanthology.org/2020.emnlp-main.557},
- year = {2020}
-}
-"""

 import os
 import uuid
+import gradio as gr
+import rebiber
+# ---------- preload DB ---------- #
+PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
+bib_db = rebiber.construct_bib_db(
+    os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT
+)
+abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv")
+# ---------- helper functions ---------- #
+def process(input_bib: str,
+            shorten: bool,
+            remove_keys: list[str],
+            deduplicate: bool,
+            sort_ids: bool):
+    """Normalize a BibTeX string and write it to disk."""
+    if "@" not in input_bib:
+        # Nothing that looks like BibTeX
+        return "N/A", "", gr.Button.update(visible=False)
+    run_id = uuid.uuid4().hex
+    in_file  = f"input_{run_id}.bib"
+    out_file = f"output_{run_id}.bib"
+    # Write raw input
+    with open(in_file, "w") as f:
+        f.write(input_bib.replace("\t", "    "))
+    entries = rebiber.load_bib_file(in_file)
+    rebiber.normalize_bib(
+        bib_db,
+        entries,
+        out_file,
+        abbr_dict = abbr_dict if shorten else [],
+        deduplicate = deduplicate,
+        sort = sort_ids,
+        removed_value_names = remove_keys,
+    )
+    with open(out_file) as f:
         output_bib = f.read().replace("\n ", "\n    ")
+    # Show output + enable “Download” button
+    return output_bib, run_id, gr.update(visible=True)
+def download_file(run_id: str):
+    """Expose the normalized .bib for download."""
+    file_path = f"output_{run_id}.bib"
+    return file_path, gr.update(visible=True)
+# ---------- demo UI ---------- #
+EXAMPLE = """
 @article{lin2020birds,
     title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
     author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
     journal={arXiv preprint arXiv:2005.00683},
     year={2020}
 }
+"""
+with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+# **Rebiber** – normalise those BibTeX entries!
+🐼 [Project&nbsp;page](https://yuchenlin.xyz/) &nbsp;&nbsp;|&nbsp;&nbsp;
+:octocat: [GitHub](https://github.com/yuchenlin/rebiber) &nbsp;&nbsp;|&nbsp;&nbsp;
+🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272)
+Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL
+Anthology), deduplicates, sorts, and can abbreviate conference names.
+"""
     )
     with gr.Row():
+        # ---------- left column ---------- #
         with gr.Column(scale=3):
+            input_bib = gr.Textbox(
+                label="Input BibTeX",
+                value=EXAMPLE,
+                lines=15,
+                interactive=True,
+            )
+            removekeys = gr.CheckboxGroup(
+                ["url", "biburl", "address", "publisher",
+                 "pages", "doi", "volume", "bibsource"],
+                label="Remove fields",
+                info="Select the keys you’d like to strip",
+            )
+            shorten = gr.Checkbox(label="Abbreviate venue names")
+            dedup   = gr.Checkbox(label="Deduplicate entries")
+            sort    = gr.Checkbox(label="Sort IDs alphabetically")
             with gr.Row():
+                clear_btn   = gr.Button("Clear")
+                submit_btn  = gr.Button("Submit")
+            run_uuid = gr.Textbox(visible=False)
+        # ---------- right column ---------- #
+        with gr.Column(scale=3):
+            output_box = gr.Textbox(
+                label="Normalised BibTeX",
+                interactive=False,
+                show_copy_button=True,      # :contentReference[oaicite:2]{index=2}
+            )
+            download_btn = gr.Button("Generate . bib file", visible=False)
+            download_file_component = gr.File(visible=False)
+    # ---------- wiring ---------- #
+    submit_btn.click(
+        process,
+        inputs=[input_bib, shorten, removekeys, dedup, sort],
+        outputs=[output_box, run_uuid, download_btn],
+        api_name="process",
+    )
+    download_btn.click(download_file, run_uuid, [download_file_component, download_file_component])
+    clear_btn.click(lambda: "", None, input_bib)
+if __name__ == "__main__":
+    demo.launch()