Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,170 +1,128 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import rebiber
|
3 |
import os
|
4 |
import uuid
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
|
|
|
|
13 |
|
14 |
-
|
|
|
|
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
if "@" not in input_bib:
|
19 |
-
return "N/A"
|
20 |
-
global abbr_dict
|
21 |
-
# print(f"remove_keys={remove_keys}")
|
22 |
-
random_id = uuid.uuid4().hex
|
23 |
-
with open(f"input_{random_id}.bib", "w") as f:
|
24 |
-
f.write(input_bib.replace("\t", " "))
|
25 |
-
all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
|
26 |
-
print("# Input Bib Entries:", len(all_bib_entries))
|
27 |
-
abbr_dict_pass = []
|
28 |
-
if shorten:
|
29 |
-
abbr_dict_pass = abbr_dict
|
30 |
-
rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
|
31 |
-
abbr_dict=abbr_dict_pass,
|
32 |
-
deduplicate=deduplicate,
|
33 |
-
sort=sort,
|
34 |
-
removed_value_names=remove_keys)
|
35 |
-
with open(f"output_{random_id}.bib") as f:
|
36 |
output_bib = f.read().replace("\n ", "\n ")
|
37 |
-
|
38 |
-
#
|
39 |
-
return output_bib,
|
40 |
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
@article{lin2020birds,
|
44 |
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
|
45 |
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
|
46 |
journal={arXiv preprint arXiv:2005.00683},
|
47 |
year={2020}
|
48 |
-
}
|
49 |
-
@inproceedings{Lin2020CommonGenAC,
|
50 |
-
title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
|
51 |
-
author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
|
52 |
-
booktitle={Findings},
|
53 |
-
year={2020}
|
54 |
}
|
55 |
-
"""
|
56 |
-
|
57 |
-
examples = [[example_input]]
|
58 |
-
|
59 |
-
|
60 |
-
# iface = gr.Interface(fn=process,
|
61 |
-
# inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
|
62 |
-
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
|
63 |
-
# examples=examples,
|
64 |
-
# allow_flagging="never"
|
65 |
-
# )
|
66 |
-
|
67 |
-
|
68 |
|
|
|
|
|
|
|
|
|
69 |
|
|
|
|
|
|
|
70 |
|
71 |
-
with
|
72 |
-
|
73 |
-
|
74 |
-
'''# Rebiber: A tool for normalizing bibtex with official info.
|
75 |
-
<table>
|
76 |
-
<tr>
|
77 |
-
<td>
|
78 |
-
<a href="https://yuchenlin.xyz/">
|
79 |
-
<img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
|
80 |
-
</a>
|
81 |
-
</td>
|
82 |
-
<td>
|
83 |
-
<a href="https://github.com/yuchenlin/rebiber">
|
84 |
-
<img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
|
85 |
-
</a>
|
86 |
-
</td>
|
87 |
-
<td>
|
88 |
-
<a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
|
89 |
-
<img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
|
90 |
-
</a>
|
91 |
-
</td>
|
92 |
-
</tr>
|
93 |
-
</table>
|
94 |
-
<span style="font-size:13pt">
|
95 |
-
|
96 |
-
We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
|
97 |
-
We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
|
98 |
-
Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
|
99 |
-
|
100 |
-
</span>
|
101 |
-
'''
|
102 |
)
|
103 |
-
|
104 |
with gr.Row():
|
|
|
105 |
with gr.Column(scale=3):
|
106 |
-
input_bib = gr.Textbox(
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
with gr.Row():
|
114 |
-
|
115 |
-
|
116 |
-
ex_uuid = gr.Text(label="UUID")
|
117 |
-
ex_uuid.visible = False
|
118 |
-
with gr.Column(scale=3):
|
119 |
-
output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
|
120 |
-
download_btn = gr.Button("Generate Bib File")
|
121 |
-
download_btn.visible = False
|
122 |
-
download_content = gr.outputs.File()
|
123 |
-
download_content.visible = False
|
124 |
-
def download_file(ex_uuid):
|
125 |
-
global download_content
|
126 |
-
# Replace this with your code to generate/download the file
|
127 |
-
file_path = f"output_{ex_uuid}.bib"
|
128 |
-
download_content.update(visible=False)
|
129 |
-
return file_path, gr.update(visible=True)
|
130 |
-
download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
|
131 |
-
button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
|
132 |
-
def clean(text):
|
133 |
-
return ""
|
134 |
-
clr_button.click(clean, input_bib, input_bib)
|
135 |
-
# gr.Interface(fn=process,
|
136 |
-
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
|
137 |
-
# examples=examples,
|
138 |
-
# allow_flagging="never",
|
139 |
-
# scroll_to_output=True,
|
140 |
-
# show_progress=True,
|
141 |
-
# )
|
142 |
|
|
|
143 |
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
|
148 |
-
""
|
149 |
-
|
150 |
-
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
|
151 |
-
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
|
152 |
-
journal={arXiv preprint arXiv:2005.00683},
|
153 |
-
year={2020}
|
154 |
-
}
|
155 |
-
|
156 |
-
@inproceedings{lin2020birds,
|
157 |
-
address = {Online},
|
158 |
-
author = {Lin, Bill Yuchen and
|
159 |
-
Lee, Seyeon and
|
160 |
-
Khanna, Rahul and
|
161 |
-
Ren, Xiang},
|
162 |
-
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
|
163 |
-
doi = {10.18653/v1/2020.emnlp-main.557},
|
164 |
-
pages = {6862--6868},
|
165 |
-
publisher = {Association for Computational Linguistics},
|
166 |
-
title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
|
167 |
-
url = {https://aclanthology.org/2020.emnlp-main.557},
|
168 |
-
year = {2020}
|
169 |
-
}
|
170 |
-
"""
|
|
|
|
|
|
|
1 |
import os
|
2 |
import uuid
|
3 |
+
import gradio as gr
|
4 |
+
import rebiber
|
5 |
+
|
6 |
+
# ---------- preload DB ---------- #
|
7 |
+
PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
|
8 |
+
bib_db = rebiber.construct_bib_db(
|
9 |
+
os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT
|
10 |
+
)
|
11 |
+
abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv")
|
12 |
+
|
13 |
+
# ---------- helper functions ---------- #
|
14 |
+
def process(input_bib: str,
|
15 |
+
shorten: bool,
|
16 |
+
remove_keys: list[str],
|
17 |
+
deduplicate: bool,
|
18 |
+
sort_ids: bool):
|
19 |
+
"""Normalize a BibTeX string and write it to disk."""
|
20 |
+
if "@" not in input_bib:
|
21 |
+
# Nothing that looks like BibTeX
|
22 |
+
return "N/A", "", gr.Button.update(visible=False)
|
23 |
|
24 |
+
run_id = uuid.uuid4().hex
|
25 |
+
in_file = f"input_{run_id}.bib"
|
26 |
+
out_file = f"output_{run_id}.bib"
|
27 |
|
28 |
+
# Write raw input
|
29 |
+
with open(in_file, "w") as f:
|
30 |
+
f.write(input_bib.replace("\t", " "))
|
31 |
|
32 |
+
entries = rebiber.load_bib_file(in_file)
|
33 |
+
rebiber.normalize_bib(
|
34 |
+
bib_db,
|
35 |
+
entries,
|
36 |
+
out_file,
|
37 |
+
abbr_dict = abbr_dict if shorten else [],
|
38 |
+
deduplicate = deduplicate,
|
39 |
+
sort = sort_ids,
|
40 |
+
removed_value_names = remove_keys,
|
41 |
+
)
|
42 |
|
43 |
+
with open(out_file) as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
output_bib = f.read().replace("\n ", "\n ")
|
45 |
+
|
46 |
+
# Show output + enable “Download” button
|
47 |
+
return output_bib, run_id, gr.update(visible=True)
|
48 |
|
49 |
|
50 |
+
def download_file(run_id: str):
|
51 |
+
"""Expose the normalized .bib for download."""
|
52 |
+
file_path = f"output_{run_id}.bib"
|
53 |
+
|
54 |
+
return file_path, gr.update(visible=True)
|
55 |
+
|
56 |
+
|
57 |
+
# ---------- demo UI ---------- #
|
58 |
+
EXAMPLE = """
|
59 |
@article{lin2020birds,
|
60 |
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
|
61 |
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
|
62 |
journal={arXiv preprint arXiv:2005.00683},
|
63 |
year={2020}
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
}
|
65 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo:
|
68 |
+
gr.Markdown(
|
69 |
+
"""
|
70 |
+
# **Rebiber** – normalise those BibTeX entries!
|
71 |
|
72 |
+
🐼 [Project page](https://yuchenlin.xyz/) |
|
73 |
+
:octocat: [GitHub](https://github.com/yuchenlin/rebiber) |
|
74 |
+
🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272)
|
75 |
|
76 |
+
Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL
|
77 |
+
Anthology), deduplicates, sorts, and can abbreviate conference names.
|
78 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
)
|
80 |
+
|
81 |
with gr.Row():
|
82 |
+
# ---------- left column ---------- #
|
83 |
with gr.Column(scale=3):
|
84 |
+
input_bib = gr.Textbox(
|
85 |
+
label="Input BibTeX",
|
86 |
+
value=EXAMPLE,
|
87 |
+
lines=15,
|
88 |
+
interactive=True,
|
89 |
+
)
|
90 |
+
removekeys = gr.CheckboxGroup(
|
91 |
+
["url", "biburl", "address", "publisher",
|
92 |
+
"pages", "doi", "volume", "bibsource"],
|
93 |
+
label="Remove fields",
|
94 |
+
info="Select the keys you’d like to strip",
|
95 |
+
)
|
96 |
+
shorten = gr.Checkbox(label="Abbreviate venue names")
|
97 |
+
dedup = gr.Checkbox(label="Deduplicate entries")
|
98 |
+
sort = gr.Checkbox(label="Sort IDs alphabetically")
|
99 |
+
|
100 |
with gr.Row():
|
101 |
+
clear_btn = gr.Button("Clear")
|
102 |
+
submit_btn = gr.Button("Submit")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
run_uuid = gr.Textbox(visible=False)
|
105 |
|
106 |
+
# ---------- right column ---------- #
|
107 |
+
with gr.Column(scale=3):
|
108 |
+
output_box = gr.Textbox(
|
109 |
+
label="Normalised BibTeX",
|
110 |
+
interactive=False,
|
111 |
+
show_copy_button=True, # :contentReference[oaicite:2]{index=2}
|
112 |
+
)
|
113 |
+
download_btn = gr.Button("Generate . bib file", visible=False)
|
114 |
+
download_file_component = gr.File(visible=False)
|
115 |
+
|
116 |
+
# ---------- wiring ---------- #
|
117 |
+
submit_btn.click(
|
118 |
+
process,
|
119 |
+
inputs=[input_bib, shorten, removekeys, dedup, sort],
|
120 |
+
outputs=[output_box, run_uuid, download_btn],
|
121 |
+
api_name="process",
|
122 |
+
)
|
123 |
+
download_btn.click(download_file, run_uuid, [download_file_component, download_file_component])
|
124 |
+
clear_btn.click(lambda: "", None, input_bib)
|
125 |
|
126 |
|
127 |
+
if __name__ == "__main__":
|
128 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|