File size: 4,234 Bytes
9269d50
 
42c6ece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9269d50
42c6ece
 
 
9269d50
42c6ece
 
 
9269d50
42c6ece
 
 
 
 
 
 
 
 
 
9269d50
42c6ece
9269d50
42c6ece
 
 
9269d50
 
42c6ece
 
 
 
 
 
 
 
 
9269d50
 
 
 
 
9d8fc70
42c6ece
4835e80
42c6ece
 
 
 
4835e80
42c6ece
 
 
4835e80
42c6ece
 
 
9269d50
42c6ece
4835e80
42c6ece
4835e80
42c6ece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d39adb8
42c6ece
 
4e9d603
42c6ece
0fa757f
42c6ece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9269d50
 
42c6ece
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import uuid
import gradio as gr
import rebiber

# ---------- preload DB ---------- #
PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
bib_db = rebiber.construct_bib_db(
    os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT
)
abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv")

# ---------- helper functions ---------- #
def process(input_bib: str,
            shorten: bool,
            remove_keys: list[str],
            deduplicate: bool,
            sort_ids: bool):
    """Normalize a BibTeX string and write it to disk."""
    if "@" not in input_bib:
        # Nothing that looks like BibTeX
        return "N/A", "", gr.Button.update(visible=False)

    run_id = uuid.uuid4().hex
    in_file  = f"input_{run_id}.bib"
    out_file = f"output_{run_id}.bib"

    # Write raw input
    with open(in_file, "w") as f:
        f.write(input_bib.replace("\t", "    "))

    entries = rebiber.load_bib_file(in_file)
    rebiber.normalize_bib(
        bib_db,
        entries,
        out_file,
        abbr_dict = abbr_dict if shorten else [],
        deduplicate = deduplicate,
        sort = sort_ids,
        removed_value_names = remove_keys,
    )

    with open(out_file) as f:
        output_bib = f.read().replace("\n ", "\n    ")

    # Show output + enable “Download” button
    return output_bib, run_id, gr.update(visible=True)


def download_file(run_id: str):
    """Expose the normalized .bib for download."""
    file_path = f"output_{run_id}.bib"

    return file_path, gr.update(visible=True)


# ---------- demo UI ---------- #
EXAMPLE = """
@article{lin2020birds,
    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
    journal={arXiv preprint arXiv:2005.00683},
    year={2020}
}
"""

with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
# **Rebiber** – normalise those BibTeX entries!  

🐼 [Project page](https://yuchenlin.xyz/)   |  
:octocat: [GitHub](https://github.com/yuchenlin/rebiber)   |  
🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272)  

Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL
Anthology), deduplicates, sorts, and can abbreviate conference names.
"""
    )

    with gr.Row():
        # ---------- left column ---------- #
        with gr.Column(scale=3):
            input_bib = gr.Textbox(
                label="Input BibTeX",
                value=EXAMPLE,
                lines=15,
                interactive=True,
            )
            removekeys = gr.CheckboxGroup(
                ["url", "biburl", "address", "publisher",
                 "pages", "doi", "volume", "bibsource"],
                label="Remove fields",
                info="Select the keys you’d like to strip",
            )
            shorten = gr.Checkbox(label="Abbreviate venue names")
            dedup   = gr.Checkbox(label="Deduplicate entries")
            sort    = gr.Checkbox(label="Sort IDs alphabetically")

            with gr.Row():
                clear_btn   = gr.Button("Clear")
                submit_btn  = gr.Button("Submit")

            run_uuid = gr.Textbox(visible=False)

        # ---------- right column ---------- #
        with gr.Column(scale=3):
            output_box = gr.Textbox(
                label="Normalised BibTeX",
                interactive=False,
                show_copy_button=True,      # :contentReference[oaicite:2]{index=2}
            )
            download_btn = gr.Button("Generate . bib file", visible=False)
            download_file_component = gr.File(visible=False)

    # ---------- wiring ---------- #
    submit_btn.click(
        process,
        inputs=[input_bib, shorten, removekeys, dedup, sort],
        outputs=[output_box, run_uuid, download_btn],
        api_name="process",
    )
    download_btn.click(download_file, run_uuid, [download_file_component, download_file_component])
    clear_btn.click(lambda: "", None, input_bib)


if __name__ == "__main__":
    demo.launch()