Spaces:
Sleeping
Sleeping
mkdir cache | |
cd cache | |
if ! [ -f acl-anthology/bin/anthology/anthology.py ]; then | |
git clone https://github.com/acl-org/acl-anthology | |
else | |
cd acl-anthology | |
git pull | |
cd .. | |
fi | |
cd acl-anthology/bin | |
pip install -r https://raw.githubusercontent.com/acl-org/acl-anthology/master/bin/requirements.txt | |
python -c ' | |
import json | |
from anthology import Anthology | |
anthology = Anthology(importdir="../data") | |
pops = ["xml_booktitle", "xml_title", "xml_url", "xml_abstract"] | |
papers = [] | |
for paper in anthology.papers.values(): | |
p = paper.as_dict() | |
if "xml_abstract" in p: | |
p["abstract"] = paper.get_abstract(form="latex") | |
for popkey in pops: | |
if popkey in p: | |
p.pop(popkey) | |
if "author" in p: | |
p["author"] = [a[0].as_dict() for a in p["author"]] | |
if "editor" in p: | |
p["editor"] = [a[0].as_dict() for a in p["editor"]] | |
papers.append(p) | |
with open("../../aclanthology.json", "wt", encoding="utf8") as fout: | |
json.dump(papers, fout, ensure_ascii=False) | |
' | |