yuchenlin commited on
Commit
42c6ece
·
verified ·
1 Parent(s): 7c59eee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -144
app.py CHANGED
@@ -1,170 +1,128 @@
1
- import gradio as gr
2
- import rebiber
3
  import os
4
  import uuid
5
-
6
-
7
- # Load Bib Database
8
- filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
9
- bib_list_path = os.path.join(filepath, "bib_list.txt")
10
- abbr_tsv_path = "abbr.tsv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
 
 
13
 
14
- abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)
 
 
15
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def process(input_bib, shorten, remove_keys, deduplicate, sort):
18
- if "@" not in input_bib:
19
- return "N/A"
20
- global abbr_dict
21
- # print(f"remove_keys={remove_keys}")
22
- random_id = uuid.uuid4().hex
23
- with open(f"input_{random_id}.bib", "w") as f:
24
- f.write(input_bib.replace("\t", " "))
25
- all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
26
- print("# Input Bib Entries:", len(all_bib_entries))
27
- abbr_dict_pass = []
28
- if shorten:
29
- abbr_dict_pass = abbr_dict
30
- rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
31
- abbr_dict=abbr_dict_pass,
32
- deduplicate=deduplicate,
33
- sort=sort,
34
- removed_value_names=remove_keys)
35
- with open(f"output_{random_id}.bib") as f:
36
  output_bib = f.read().replace("\n ", "\n ")
37
- # delete both files
38
- # print(output_bib)
39
- return output_bib, random_id, gr.update(visible=True)
40
 
41
 
42
- example_input = """
 
 
 
 
 
 
 
 
43
  @article{lin2020birds,
44
  title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
45
  author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
46
  journal={arXiv preprint arXiv:2005.00683},
47
  year={2020}
48
- }
49
- @inproceedings{Lin2020CommonGenAC,
50
- title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
51
- author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
52
- booktitle={Findings},
53
- year={2020}
54
  }
55
- """
56
-
57
- examples = [[example_input]]
58
-
59
-
60
- # iface = gr.Interface(fn=process,
61
- # inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
62
- # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
63
- # examples=examples,
64
- # allow_flagging="never"
65
- # )
66
-
67
-
68
 
 
 
 
 
69
 
 
 
 
70
 
71
- with gr.Blocks() as demo:
72
-
73
- gr.Markdown(
74
- '''# Rebiber: A tool for normalizing bibtex with official info.
75
- <table>
76
- <tr>
77
- <td>
78
- <a href="https://yuchenlin.xyz/">
79
- <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
80
- </a>
81
- </td>
82
- <td>
83
- <a href="https://github.com/yuchenlin/rebiber">
84
- <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
85
- </a>
86
- </td>
87
- <td>
88
- <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
89
- <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
90
- </a>
91
- </td>
92
- </tr>
93
- </table>
94
- <span style="font-size:13pt">
95
-
96
- We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
97
- We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
98
- Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
99
-
100
- </span>
101
- '''
102
  )
103
-
104
  with gr.Row():
 
105
  with gr.Column(scale=3):
106
- input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
107
- removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"],
108
- value=[False, False, False, False, False, False, False, False],
109
- label="Remove Keys", info="Which keys to remove?")
110
- shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)
111
- dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
112
- sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
 
 
 
 
 
 
 
 
 
113
  with gr.Row():
114
- clr_button = gr.Button("Clear")
115
- button = gr.Button("Submit")
116
- ex_uuid = gr.Text(label="UUID")
117
- ex_uuid.visible = False
118
- with gr.Column(scale=3):
119
- output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
120
- download_btn = gr.Button("Generate Bib File")
121
- download_btn.visible = False
122
- download_content = gr.outputs.File()
123
- download_content.visible = False
124
- def download_file(ex_uuid):
125
- global download_content
126
- # Replace this with your code to generate/download the file
127
- file_path = f"output_{ex_uuid}.bib"
128
- download_content.update(visible=False)
129
- return file_path, gr.update(visible=True)
130
- download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
131
- button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
132
- def clean(text):
133
- return ""
134
- clr_button.click(clean, input_bib, input_bib)
135
- # gr.Interface(fn=process,
136
- # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
137
- # examples=examples,
138
- # allow_flagging="never",
139
- # scroll_to_output=True,
140
- # show_progress=True,
141
- # )
142
 
 
143
 
144
- if __name__ == "__main__":
145
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
- """
149
- @article{lin2020birds,
150
- title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
151
- author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
152
- journal={arXiv preprint arXiv:2005.00683},
153
- year={2020}
154
- }
155
-
156
- @inproceedings{lin2020birds,
157
- address = {Online},
158
- author = {Lin, Bill Yuchen and
159
- Lee, Seyeon and
160
- Khanna, Rahul and
161
- Ren, Xiang},
162
- booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
163
- doi = {10.18653/v1/2020.emnlp-main.557},
164
- pages = {6862--6868},
165
- publisher = {Association for Computational Linguistics},
166
- title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
167
- url = {https://aclanthology.org/2020.emnlp-main.557},
168
- year = {2020}
169
- }
170
- """
 
 
 
1
  import os
2
  import uuid
3
+ import gradio as gr
4
+ import rebiber
5
+
6
+ # ---------- preload DB ---------- #
7
+ PACKAGE_ROOT = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
8
+ bib_db = rebiber.construct_bib_db(
9
+ os.path.join(PACKAGE_ROOT, "bib_list.txt"), start_dir=PACKAGE_ROOT
10
+ )
11
+ abbr_dict = rebiber.normalize.load_abbr_tsv("abbr.tsv")
12
+
13
+ # ---------- helper functions ---------- #
14
+ def process(input_bib: str,
15
+ shorten: bool,
16
+ remove_keys: list[str],
17
+ deduplicate: bool,
18
+ sort_ids: bool):
19
+ """Normalize a BibTeX string and write it to disk."""
20
+ if "@" not in input_bib:
21
+ # Nothing that looks like BibTeX
22
+ return "N/A", "", gr.Button.update(visible=False)
23
 
24
+ run_id = uuid.uuid4().hex
25
+ in_file = f"input_{run_id}.bib"
26
+ out_file = f"output_{run_id}.bib"
27
 
28
+ # Write raw input
29
+ with open(in_file, "w") as f:
30
+ f.write(input_bib.replace("\t", " "))
31
 
32
+ entries = rebiber.load_bib_file(in_file)
33
+ rebiber.normalize_bib(
34
+ bib_db,
35
+ entries,
36
+ out_file,
37
+ abbr_dict = abbr_dict if shorten else [],
38
+ deduplicate = deduplicate,
39
+ sort = sort_ids,
40
+ removed_value_names = remove_keys,
41
+ )
42
 
43
+ with open(out_file) as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  output_bib = f.read().replace("\n ", "\n ")
45
+
46
+ # Show output + enable “Download” button
47
+ return output_bib, run_id, gr.update(visible=True)
48
 
49
 
50
+ def download_file(run_id: str):
51
+ """Expose the normalized .bib for download."""
52
+ file_path = f"output_{run_id}.bib"
53
+
54
+ return file_path, gr.update(visible=True)
55
+
56
+
57
+ # ---------- demo UI ---------- #
58
+ EXAMPLE = """
59
  @article{lin2020birds,
60
  title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
61
  author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
62
  journal={arXiv preprint arXiv:2005.00683},
63
  year={2020}
 
 
 
 
 
 
64
  }
65
+ """
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ with gr.Blocks(title="Rebiber", theme=gr.themes.Soft()) as demo:
68
+ gr.Markdown(
69
+ """
70
+ # **Rebiber** – normalise those BibTeX entries!
71
 
72
+ 🐼 [Project&nbsp;page](https://yuchenlin.xyz/) &nbsp;&nbsp;|&nbsp;&nbsp;
73
+ :octocat: [GitHub](https://github.com/yuchenlin/rebiber) &nbsp;&nbsp;|&nbsp;&nbsp;
74
+ 🐤 [Tweet](https://twitter.com/billyuchenlin/status/1353850378438070272)
75
 
76
+ Rebiber replaces arXiv citations with their official venue versions (DBLP / ACL
77
+ Anthology), deduplicates, sorts, and can abbreviate conference names.
78
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  )
80
+
81
  with gr.Row():
82
+ # ---------- left column ---------- #
83
  with gr.Column(scale=3):
84
+ input_bib = gr.Textbox(
85
+ label="Input BibTeX",
86
+ value=EXAMPLE,
87
+ lines=15,
88
+ interactive=True,
89
+ )
90
+ removekeys = gr.CheckboxGroup(
91
+ ["url", "biburl", "address", "publisher",
92
+ "pages", "doi", "volume", "bibsource"],
93
+ label="Remove fields",
94
+ info="Select the keys you’d like to strip",
95
+ )
96
+ shorten = gr.Checkbox(label="Abbreviate venue names")
97
+ dedup = gr.Checkbox(label="Deduplicate entries")
98
+ sort = gr.Checkbox(label="Sort IDs alphabetically")
99
+
100
  with gr.Row():
101
+ clear_btn = gr.Button("Clear")
102
+ submit_btn = gr.Button("Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ run_uuid = gr.Textbox(visible=False)
105
 
106
+ # ---------- right column ---------- #
107
+ with gr.Column(scale=3):
108
+ output_box = gr.Textbox(
109
+ label="Normalised BibTeX",
110
+ interactive=False,
111
+ show_copy_button=True, # :contentReference[oaicite:2]{index=2}
112
+ )
113
+ download_btn = gr.Button("Generate . bib file", visible=False)
114
+ download_file_component = gr.File(visible=False)
115
+
116
+ # ---------- wiring ---------- #
117
+ submit_btn.click(
118
+ process,
119
+ inputs=[input_bib, shorten, removekeys, dedup, sort],
120
+ outputs=[output_box, run_uuid, download_btn],
121
+ api_name="process",
122
+ )
123
+ download_btn.click(download_file, run_uuid, [download_file_component, download_file_component])
124
+ clear_btn.click(lambda: "", None, input_bib)
125
 
126
 
127
+ if __name__ == "__main__":
128
+ demo.launch()