Spaces:
Sleeping
Sleeping
File size: 4,479 Bytes
c149479 fe3c056 348017a 796eb82 02e4a72 796eb82 c149479 348017a 0841c28 348017a 796eb82 348017a 0841c28 348017a 796eb82 fe3c056 348017a 796eb82 fe3c056 348017a fe3c056 c54c848 796eb82 c54c848 fe3c056 c149479 796eb82 c149479 fe3c056 c149479 fe3c056 348017a 02e4a72 348017a 796eb82 348017a 796eb82 348017a 02e4a72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from src.interfaces.aclanthology import AclanthologyPaperList
from src.interfaces.arxiv import ArxivPaperList
from src.interfaces.dblp import DblpPaperList
from src.utils import (
dump_paper_list_to_jsonlines,
dump_paper_list_to_markdown_checklist,
)
if __name__ == "__main__":
# use `bash scripts/get_aclanthology.sh` to download and prepare anthology data first
acl_paper_list = AclanthologyPaperList("cache/aclanthology.json")
# `ee_query`` is an example, and you don't have to fill all the fields
ee_query = {
"title": [
["information extraction"],
["event", "extraction"],
["event", "argument", "extraction"],
["event", "detection"],
["event", "classification"],
["event", "tracking"],
["event", "relation", "extraction"],
["event", "prediction"],
["script", "learning"],
],
"venue": [
["acl"],
["emnlp"],
["naacl"],
["coling"],
["findings"],
["tacl"],
["cl"],
],
"author": [
["Heng Ji"],
["Dan Roth"],
],
"year": [
# multiple time spans with closed interval: ["2006", "2013"] means 2006-2013
["2006", "2013"],
["2018", "2022"],
],
"month": [
# the same as the `year` field
["4", "11"],
]
}
ee_papers = acl_paper_list.search(ee_query)
dump_paper_list_to_markdown_checklist(ee_papers, "results/ee-paper-list.md")
dump_paper_list_to_jsonlines(ee_papers, "results/ee-paper-list.jsonl")
doc_query = {
"title": [
["document-level"],
],
"venue": [
["acl"],
["emnlp"],
["naacl"],
["coling"],
["findings"],
["tacl"],
["cl"],
],
}
doc_papers = acl_paper_list.search(doc_query)
dump_paper_list_to_markdown_checklist(doc_papers, "results/doc-paper-list.md")
dump_paper_list_to_jsonlines(doc_papers, "results/doc-paper-list.jsonl")
# arxiv papers
arxiv_paper_list = ArxivPaperList(
"cache/ee-arxiv.xml",
use_cache=True,
title=(
"Event Extraction OR Event Argument Extraction OR Event Detection"
" OR Event Classification OR Event Tracking"
" OR Event Relation Extraction OR Information Extraction"
" OR Event Prediction OR Script Learning"
),
category="cs.CL",
)
arxiv_ee_query = {
"title": [
["information extraction"],
["event", "extraction"],
["event", "argument", "extraction"],
["event", "detection"],
["event", "classification"],
["event", "tracking"],
["event", "relation", "extraction"],
["event", "prediction"],
["script", "learning"],
],
"venue": [
["cs.CL"],
],
}
arxiv_ee_papers = arxiv_paper_list.search(arxiv_ee_query)
dump_paper_list_to_markdown_checklist(
arxiv_ee_papers, "results/arxiv-ee-paper-list.md"
)
dump_paper_list_to_jsonlines(arxiv_ee_papers, "results/arxiv-ee-paper-list.jsonl")
# dblp papers
dblp_paper_list = DblpPaperList(
"./cache/dblp.json",
use_cache=True,
query="Event|Information|Argument|Script Extraction|Classification|Tracking|Prediction|Learning",
max_results=50000,
)
dblp_ee_query = {
"title": [
["information extraction"],
["event", "extraction"],
["event", "argument", "extraction"],
["event", "detection"],
["event", "classification"],
["event", "tracking"],
["event", "relation", "extraction"],
["event", "prediction"],
["script", "learning"],
],
"venue": [
["aaai"],
["ijcai"],
["icml"],
["iclr"],
["nips"],
["neurips"],
["sigir"],
["cvpr"],
["iccv"],
],
}
dblp_ee_papers = dblp_paper_list.search(dblp_ee_query)
dump_paper_list_to_markdown_checklist(
dblp_ee_papers, "results/dblp-ee-paper-list.md"
)
dump_paper_list_to_jsonlines(dblp_ee_papers, "results/dblp-ee-paper-list.jsonl")
|