Spaces:
Sleeping
Sleeping
add arxiv
Browse files- requirements.txt +3 -2
- results/arxiv-ee-paper-list.md +215 -0
- run.py +53 -29
- src/interfaces/aclanthology.py +1 -1
- src/interfaces/arxiv.py +142 -0
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
-
tqdm
|
2 |
-
requests
|
|
|
|
1 |
+
tqdm>=4.64.1
|
2 |
+
requests>=2.28.1
|
3 |
+
feedparser>=6.0.10
|
results/arxiv-ee-paper-list.md
ADDED
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2023] [Syntactically Robust Training on Partially-Observed Data for Open Information Extraction](http://arxiv.org/abs/2301.06841v1)
|
2 |
+
- [ ] [CS.CL, 2023] [tieval: An Evaluation Framework for Temporal Information Extraction Systems](http://arxiv.org/abs/2301.04643v1)
|
3 |
+
- [ ] [CS.CL, 2023] [Universal Information Extraction as Unified Semantic Matching](http://arxiv.org/abs/2301.03282v1)
|
4 |
+
- [ ] [CS.CL, 2023] [Mask-then-Fill: A Flexible and Effective Data Augmentation Framework for Event Extraction](http://arxiv.org/abs/2301.02427v1)
|
5 |
+
- [ ] [CS.CL , CS.AI, 2023] [PIE-QG: Paraphrased Information Extraction for Unsupervised Question Generation from Small Corpora](http://arxiv.org/abs/2301.01064v1)
|
6 |
+
- [ ] [CS.CL , CS.AI, 2022] [Information Extraction and Human-Robot Dialogue towards Real-life Tasks: A Baseline Study with the MobileCS Dataset](http://arxiv.org/abs/2209.13464v2)
|
7 |
+
- [ ] [CS.CL, 2022] [Generalizing through Forgetting -- Domain Generalization for Symptom Event Extraction in Clinical Notes](http://arxiv.org/abs/2209.09485v1)
|
8 |
+
- [ ] [CS.CL, 2022] [A Few-shot Approach to Resume Information Extraction via Prompts](http://arxiv.org/abs/2209.09450v1)
|
9 |
+
- [ ] [CS.CL, 2022] [Automatic Error Analysis for Document-level Information Extraction](http://arxiv.org/abs/2209.07442v1)
|
10 |
+
- [ ] [CS.CL, 2022] [OneEE: A One-Stage Framework for Fast Overlapping and Nested Event Extraction](http://arxiv.org/abs/2209.02693v1)
|
11 |
+
- [ ] [CS.CL, 2022] [Few-Shot Document-Level Event Argument Extraction](http://arxiv.org/abs/2209.02203v1)
|
12 |
+
- [ ] [CS.CL, 2022] [Few-Shot Document-Level Event Argument Extraction](http://arxiv.org/abs/2209.02203v1)
|
13 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [Multi-Scale Contrastive Co-Training for Event Temporal Relation Extraction](http://arxiv.org/abs/2209.00568v1)
|
14 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [Multi-Scale Contrastive Co-Training for Event Temporal Relation Extraction](http://arxiv.org/abs/2209.00568v1)
|
15 |
+
- [ ] [CS.CL, 2022] [A Multi-Format Transfer Learning Model for Event Argument Extraction via Variational Information Bottleneck](http://arxiv.org/abs/2208.13017v3)
|
16 |
+
- [ ] [CS.CL, 2022] [A Multi-Format Transfer Learning Model for Event Argument Extraction via Variational Information Bottleneck](http://arxiv.org/abs/2208.13017v3)
|
17 |
+
- [ ] [CS.CL , CS.AI, 2022] [SPOT: Knowledge-Enhanced Language Representations for Information Extraction](http://arxiv.org/abs/2208.09625v2)
|
18 |
+
- [ ] [CS.CL , CS.AI, 2022] [End-to-end Clinical Event Extraction from Chinese Electronic Health Record](http://arxiv.org/abs/2208.09354v1)
|
19 |
+
- [ ] [CS.CL, 2022] [Open Information Extraction from 2007 to 2022 -- A Survey](http://arxiv.org/abs/2208.08690v1)
|
20 |
+
- [ ] [CS.AI , CS.CL , CS.IR, 2022] [NECE: Narrative Event Chain Extraction Toolkit](http://arxiv.org/abs/2208.08063v3)
|
21 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [DICE: Data-Efficient Clinical Event Extraction with Generative Models](http://arxiv.org/abs/2208.07989v1)
|
22 |
+
- [ ] [CS.CL, 2022] [Information Extraction from Scanned Invoice Images using Text Analysis and Layout Features](http://arxiv.org/abs/2208.04011v1)
|
23 |
+
- [ ] [CS.CL , CS.MM, 2022] [Layout-Aware Information Extraction for Document-Grounded Dialogue: Dataset, Method and Demonstration](http://arxiv.org/abs/2207.06717v1)
|
24 |
+
- [ ] [CS.CL, 2022] [GMN: Generative Multi-modal Network for Practical Document Information Extraction](http://arxiv.org/abs/2207.04713v1)
|
25 |
+
- [ ] [CS.CL, 2022] [A Medical Information Extraction Workbench to Process German Clinical Text](http://arxiv.org/abs/2207.03885v2)
|
26 |
+
- [ ] [CS.CL, 2022] [DetIE: Multilingual Open Information Extraction Inspired by Object Detection](http://arxiv.org/abs/2206.12514v1)
|
27 |
+
- [ ] [CS.CL , CS.IR, 2022] [Unsupervised Key Event Detection from Massive Text Corpora](http://arxiv.org/abs/2206.04153v2)
|
28 |
+
- [ ] [CS.CL, 2022] [RAAT: Relation-Augmented Attention Transformer for Relation Modeling in Document-Level Event Extraction](http://arxiv.org/abs/2206.03377v1)
|
29 |
+
- [ ] [CS.CL, 2022] [RAAT: Relation-Augmented Attention Transformer for Relation Modeling in Document-Level Event Extraction](http://arxiv.org/abs/2206.03377v1)
|
30 |
+
- [ ] [CS.CL , CS.DL, 2022] [Plumber: A Modular Framework to Create Information Extraction Pipelines](http://arxiv.org/abs/2206.01442v1)
|
31 |
+
- [ ] [CS.CL, 2022] [EA$^2$E: Improving Consistency with Event Awareness for Document-Level Argument Extraction](http://arxiv.org/abs/2205.14847v1)
|
32 |
+
- [ ] [CS.CL, 2022] [EA$^2$E: Improving Consistency with Event Awareness for Document-Level Argument Extraction](http://arxiv.org/abs/2205.14847v1)
|
33 |
+
- [ ] [CS.CL , CS.AI, 2022] [Jointly Learning Span Extraction and Sequence Labeling for Information Extraction from Business Documents](http://arxiv.org/abs/2205.13434v1)
|
34 |
+
- [ ] [CS.CL, 2022] [GENEVA: Pushing the Limit of Generalizability for Event Argument Extraction with 100+ Event Types](http://arxiv.org/abs/2205.12505v1)
|
35 |
+
- [ ] [CS.CL, 2022] [GENEVA: Pushing the Limit of Generalizability for Event Argument Extraction with 100+ Event Types](http://arxiv.org/abs/2205.12505v1)
|
36 |
+
- [ ] [CS.CL , CS.AI, 2022] [Improve Event Extraction via Self-Training with Gradient Guidance](http://arxiv.org/abs/2205.12490v1)
|
37 |
+
- [ ] [CS.CL, 2022] [A Survey on Neural Open Information Extraction: Current Status and Future Directions](http://arxiv.org/abs/2205.11725v2)
|
38 |
+
- [ ] [CS.CL , CS.AI, 2022] [Dynamic Prefix-Tuning for Generative Template-based Event Extraction](http://arxiv.org/abs/2205.06166v1)
|
39 |
+
- [ ] [CS.CL , CS.LG, 2022] [Utilizing coarse-grained data in low-data settings for event extraction](http://arxiv.org/abs/2205.05468v1)
|
40 |
+
- [ ] [CS.CL, 2022] [CompactIE: Compact Facts in Open Information Extraction](http://arxiv.org/abs/2205.02880v2)
|
41 |
+
- [ ] [CS.CL , CS.DL , H.4, 2022] [A Library Perspective on Nearly-Unsupervised Information Extraction Workflows in Digital Libraries](http://arxiv.org/abs/2205.00716v1)
|
42 |
+
- [ ] [CS.CL , CS.IR, 2022] [Large-Scale Multi-Document Summarization with Information Extraction and Compression](http://arxiv.org/abs/2205.00548v1)
|
43 |
+
- [ ] [CS.CL, 2022] [CUP: Curriculum Learning based Prompt Tuning for Implicit Event Argument Extraction](http://arxiv.org/abs/2205.00498v2)
|
44 |
+
- [ ] [CS.CL, 2022] [CUP: Curriculum Learning based Prompt Tuning for Implicit Event Argument Extraction](http://arxiv.org/abs/2205.00498v2)
|
45 |
+
- [ ] [CS.CL , CS.AI, 2022] [A Two-Stream AMR-enhanced Model for Document-level Event Argument Extraction](http://arxiv.org/abs/2205.00241v1)
|
46 |
+
- [ ] [CS.CL , CS.AI, 2022] [A Two-Stream AMR-enhanced Model for Document-level Event Argument Extraction](http://arxiv.org/abs/2205.00241v1)
|
47 |
+
- [ ] [CS.CL , 68T99 , I.2.7, 2022] [CrudeOilNews: An Annotated Crude Oil News Corpus for Event Extraction](http://arxiv.org/abs/2204.03871v1)
|
48 |
+
- [ ] [CS.CL , CS.AI, 2022] [Improving Zero-Shot Event Extraction via Sentence Simplification](http://arxiv.org/abs/2204.02531v1)
|
49 |
+
- [ ] [CS.CL, 2022] [ZS4IE: A toolkit for Zero-Shot Information Extraction with simple Verbalizations](http://arxiv.org/abs/2203.13602v3)
|
50 |
+
- [ ] [CS.CL, 2022] [Unified Structure Generation for Universal Information Extraction](http://arxiv.org/abs/2203.12277v1)
|
51 |
+
- [ ] [CS.CL , CS.CV , CS.LG, 2022] [FormNet: Structural Encoding beyond Sequential Modeling in Form Document Information Extraction](http://arxiv.org/abs/2203.08411v2)
|
52 |
+
- [ ] [CS.CL, 2022] [Multilingual Generative Language Models for Zero-Shot Cross-Lingual Event Argument Extraction](http://arxiv.org/abs/2203.08308v1)
|
53 |
+
- [ ] [CS.CL, 2022] [Multilingual Generative Language Models for Zero-Shot Cross-Lingual Event Argument Extraction](http://arxiv.org/abs/2203.08308v1)
|
54 |
+
- [ ] [CS.CL , CS.AI, 2022] [Prompt for Extraction? PAIE: Prompting Argument Interaction for Event Argument Extraction](http://arxiv.org/abs/2202.12109v2)
|
55 |
+
- [ ] [CS.CL , CS.AI, 2022] [Prompt for Extraction? PAIE: Prompting Argument Interaction for Event Argument Extraction](http://arxiv.org/abs/2202.12109v2)
|
56 |
+
- [ ] [CS.CL, 2022] [FAMIE: A Fast Active Learning Framework for Multilingual Information Extraction](http://arxiv.org/abs/2202.08316v2)
|
57 |
+
- [ ] [STAT.AP , CS.CL, 2022] [Introducing the ICBe Dataset: Very High Recall and Precision Event Extraction from Narratives about International Crises](http://arxiv.org/abs/2202.07081v2)
|
58 |
+
- [ ] [CS.CL, 2022] [Document-Level Event Extraction via Human-Like Reading Process](http://arxiv.org/abs/2202.03092v1)
|
59 |
+
- [ ] [CS.CL, 2022] [WebFormer: The Web-page Transformer for Structure Information Extraction](http://arxiv.org/abs/2202.00217v1)
|
60 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [On Event Individuation for Document-Level Information Extraction](http://arxiv.org/abs/2212.09702v1)
|
61 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [On Event Individuation for Document-Level Information Extraction](http://arxiv.org/abs/2212.09702v1)
|
62 |
+
- [ ] [CS.CL, 2022] [Joint Information Extraction with Cross-Task and Cross-Instance High-Order Modeling](http://arxiv.org/abs/2212.08929v1)
|
63 |
+
- [ ] [CS.CL , CS.IR, 2022] [MORTY: Structured Summarization for Targeted Information Extraction from Scholarly Articles](http://arxiv.org/abs/2212.05429v1)
|
64 |
+
- [ ] [CS.CL , COND-MAT.MTRL-SCI , I.7.M, 2022] [Structured information extraction from complex scientific text with fine-tuned large language models](http://arxiv.org/abs/2212.05238v1)
|
65 |
+
- [ ] [CS.CL , CS.AI, 2022] [Syntactic Multi-view Learning for Open Information Extraction](http://arxiv.org/abs/2212.02068v1)
|
66 |
+
- [ ] [CS.CL, 2022] [Towards Generalized Open Information Extraction](http://arxiv.org/abs/2211.15987v1)
|
67 |
+
- [ ] [CS.CL, 2022] [MUSIED: A Benchmark for Event Detection from Multi-Source Heterogeneous Informal Texts](http://arxiv.org/abs/2211.13896v1)
|
68 |
+
- [ ] [CS.CL, 2022] [PESE: Event Structure Extraction using Pointer Network based Encoder-Decoder Architecture](http://arxiv.org/abs/2211.12157v1)
|
69 |
+
- [ ] [CS.CL, 2022] [MAVEN-ERE: A Unified Large-scale Dataset for Event Coreference, Temporal, Causal, and Subevent Relation Extraction](http://arxiv.org/abs/2211.07342v1)
|
70 |
+
- [ ] [CS.CL, 2022] [MAVEN-ERE: A Unified Large-scale Dataset for Event Coreference, Temporal, Causal, and Subevent Relation Extraction](http://arxiv.org/abs/2211.07342v1)
|
71 |
+
- [ ] [CS.CL, 2022] [Retrieval-Augmented Generative Question Answering for Event Argument Extraction](http://arxiv.org/abs/2211.07067v1)
|
72 |
+
- [ ] [CS.CL, 2022] [Retrieval-Augmented Generative Question Answering for Event Argument Extraction](http://arxiv.org/abs/2211.07067v1)
|
73 |
+
- [ ] [CS.CL , CS.AI , CS.IR , CS.LG, 2022] [TIER-A: Denoising Learning Framework for Information Extraction](http://arxiv.org/abs/2211.11527v1)
|
74 |
+
- [ ] [CS.CL , CS.AI, 2022] [Gradient Imitation Reinforcement Learning for General Low-Resource Information Extraction](http://arxiv.org/abs/2211.06014v2)
|
75 |
+
- [ ] [CS.CL, 2022] [MEE: A Novel Multilingual Event Extraction Dataset](http://arxiv.org/abs/2211.05955v2)
|
76 |
+
- [ ] [CS.CL, 2022] [Efficient Zero-shot Event Extraction with Context-Definition Alignment](http://arxiv.org/abs/2211.05156v2)
|
77 |
+
- [ ] [CS.CL, 2022] [1Cademy @ Causal News Corpus 2022: Leveraging Self-Training in Causality Classification of Socio-Political Event Data](http://arxiv.org/abs/2211.02729v1)
|
78 |
+
- [ ] [CS.CV , CS.CL, 2022] [Video Event Extraction via Tracking Visual States of Arguments](http://arxiv.org/abs/2211.01781v2)
|
79 |
+
- [ ] [CS.CV , CS.CL, 2022] [Video Event Extraction via Tracking Visual States of Arguments](http://arxiv.org/abs/2211.01781v2)
|
80 |
+
- [ ] [CS.CL, 2022] [Data-efficient End-to-end Information Extraction for Statistical Legal Analysis](http://arxiv.org/abs/2211.01692v1)
|
81 |
+
- [ ] [CS.CL, 2022] [Open-Vocabulary Argument Role Prediction for Event Extraction](http://arxiv.org/abs/2211.01577v1)
|
82 |
+
- [ ] [CS.CL, 2022] [Open-Vocabulary Argument Role Prediction for Event Extraction](http://arxiv.org/abs/2211.01577v1)
|
83 |
+
- [ ] [CS.CL, 2022] [Title2Event: Benchmarking Open Event Extraction with a Large-scale Chinese Title Dataset](http://arxiv.org/abs/2211.00869v1)
|
84 |
+
- [ ] [CS.CL, 2022] [Bi-Directional Iterative Prompt-Tuning for Event Argument Extraction](http://arxiv.org/abs/2210.15843v1)
|
85 |
+
- [ ] [CS.CL, 2022] [Bi-Directional Iterative Prompt-Tuning for Event Argument Extraction](http://arxiv.org/abs/2210.15843v1)
|
86 |
+
- [ ] [CS.CL, 2022] [CrisisLTLSum: A Benchmark for Local Crisis Event Timeline Extraction and Summarization](http://arxiv.org/abs/2210.14190v1)
|
87 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2022] [IELM: An Open Information Extraction Benchmark for Pre-Trained Language Models](http://arxiv.org/abs/2210.14128v1)
|
88 |
+
- [ ] [CS.CL, 2022] [PHEE: A Dataset for Pharmacovigilance Event Extraction from Text](http://arxiv.org/abs/2210.12560v1)
|
89 |
+
- [ ] [CS.CL, 2022] [Schema-aware Reference as Prompt Improves Data-Efficient Relational Triple and Event Extraction](http://arxiv.org/abs/2210.10709v3)
|
90 |
+
- [ ] [CS.CL, 2022] [EventGraph at CASE 2021 Task 1: A General Graph-based Approach to Protest Event Extraction](http://arxiv.org/abs/2210.09770v1)
|
91 |
+
- [ ] [CS.CL, 2022] [EventGraph: Event Extraction as Semantic Graph Parsing](http://arxiv.org/abs/2210.08646v1)
|
92 |
+
- [ ] [CS.CV , CS.CL, 2022] [Cross-domain Variational Capsules for Information Extraction](http://arxiv.org/abs/2210.09053v1)
|
93 |
+
- [ ] [CS.CL, 2022] [Iterative Document-level Information Extraction via Imitation Learning](http://arxiv.org/abs/2210.06600v1)
|
94 |
+
- [ ] [CS.CL , CS.AI, 2022] [Extracting or Guessing? Improving Faithfulness of Event Temporal Relation Extraction](http://arxiv.org/abs/2210.04992v2)
|
95 |
+
- [ ] [CS.CL , CS.AI, 2022] [Extracting or Guessing? Improving Faithfulness of Event Temporal Relation Extraction](http://arxiv.org/abs/2210.04992v2)
|
96 |
+
- [ ] [CS.CL , CS.LG, 2022] [HumSet: Dataset of Multilingual Information Extraction and Classification for Humanitarian Crisis Response](http://arxiv.org/abs/2210.04573v3)
|
97 |
+
- [ ] [CS.CL , CS.AI, 2022] [Causal Intervention-based Prompt Debiasing for Event Argument Extraction](http://arxiv.org/abs/2210.01561v1)
|
98 |
+
- [ ] [CS.CL , CS.AI, 2022] [Causal Intervention-based Prompt Debiasing for Event Argument Extraction](http://arxiv.org/abs/2210.01561v1)
|
99 |
+
- [ ] [CS.CL , CS.LG, 2022] [POTATO: exPlainable infOrmation exTrAcTion framewOrk](http://arxiv.org/abs/2201.13230v2)
|
100 |
+
- [ ] [CS.CL , CS.IR, 2022] [Information Extraction through AI techniques: The KIDs use case at CONSOB](http://arxiv.org/abs/2202.01178v1)
|
101 |
+
- [ ] [CS.CL , CS.IR , CS.LG, 2022] [From Examples to Rules: Neural Guided Rule Synthesis for Information Extraction](http://arxiv.org/abs/2202.00475v1)
|
102 |
+
- [ ] [CS.CL, 2022] [Writing Style Aware Document-level Event Extraction](http://arxiv.org/abs/2201.03188v1)
|
103 |
+
- [ ] [CS.CL , CS.AI, 2022] [Monitoring Energy Trends through Automatic Information Extraction](http://arxiv.org/abs/2201.01559v1)
|
104 |
+
- [ ] [CS.CL , COND-MAT.MTRL-SCI, 2021] [MatSciBERT: A Materials Domain Language Model for Text Mining and Information Extraction](http://arxiv.org/abs/2109.15290v1)
|
105 |
+
- [ ] [CS.CL , CS.AI , I.7; H.4; H.5, 2021] [Effective Use of Graph Convolution Network and Contextual Sub-Tree forCommodity News Event Extraction](http://arxiv.org/abs/2109.12781v1)
|
106 |
+
- [ ] [CS.CL, 2021] [Language Model Priming for Cross-Lingual Event Extraction](http://arxiv.org/abs/2109.12383v1)
|
107 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2021] [Zero-Shot Information Extraction as a Unified Text-to-Triple Translation](http://arxiv.org/abs/2109.11171v1)
|
108 |
+
- [ ] [CS.CL, 2021] [Modality and Negation in Event Extraction](http://arxiv.org/abs/2109.09393v1)
|
109 |
+
- [ ] [CS.CL , CS.IR , CS.LG, 2021] [Slot Filling for Biomedical Information Extraction](http://arxiv.org/abs/2109.08564v2)
|
110 |
+
- [ ] [CS.AI , CS.CL, 2021] [An Ontology-Based Information Extraction System for Residential Land Use Suitability Analysis](http://arxiv.org/abs/2109.07672v1)
|
111 |
+
- [ ] [CS.CL, 2021] [AnnIE: An Annotation Platform for Constructing Complete Open Information Extraction Benchmark](http://arxiv.org/abs/2109.07464v2)
|
112 |
+
- [ ] [CS.CL, 2021] [Enhancing Clinical Information Extraction with Transferred Contextual Embeddings](http://arxiv.org/abs/2109.07243v2)
|
113 |
+
- [ ] [CS.CL , CS.AI, 2021] [BenchIE: A Framework for Multi-Faceted Fact-Based Open Information Extraction Evaluation](http://arxiv.org/abs/2109.06850v2)
|
114 |
+
- [ ] [CS.CL, 2021] [Everything Is All It Takes: A Multipronged Strategy for Zero-Shot Cross-Lingual Information Extraction](http://arxiv.org/abs/2109.06798v1)
|
115 |
+
- [ ] [CS.CL, 2021] [A system for information extraction from scientific texts in Russian](http://arxiv.org/abs/2109.06703v1)
|
116 |
+
- [ ] [CS.CL , CS.CV, 2021] [Deep learning-based NLP Data Pipeline for EHR Scanned Document Information Extraction](http://arxiv.org/abs/2110.11864v1)
|
117 |
+
- [ ] [CS.CL, 2021] [Traffic Event Detection as a Slot Filling Problem](http://arxiv.org/abs/2109.06035v1)
|
118 |
+
- [ ] [Q-BIO.QM , CS.CL , CS.LG, 2021] [Clinical Trial Information Extraction with BERT](http://arxiv.org/abs/2110.10027v1)
|
119 |
+
- [ ] [CS.CL, 2021] [Uncovering Main Causalities for Long-tailed Information Extraction](http://arxiv.org/abs/2109.05213v1)
|
120 |
+
- [ ] [CS.CL , CS.AI, 2021] [PoKE: A Prompt-based Knowledge Eliciting Approach for Event Argument Extraction](http://arxiv.org/abs/2109.05190v3)
|
121 |
+
- [ ] [CS.CL , CS.AI, 2021] [PoKE: A Prompt-based Knowledge Eliciting Approach for Event Argument Extraction](http://arxiv.org/abs/2109.05190v3)
|
122 |
+
- [ ] [CS.CL, 2021] [Text-to-Table: A New Way of Information Extraction](http://arxiv.org/abs/2109.02707v2)
|
123 |
+
- [ ] [CS.CL , CS.IR , CS.LG, 2021] [Knowledge Graph Enhanced Event Extraction in Financial Documents](http://arxiv.org/abs/2109.02592v1)
|
124 |
+
- [ ] [CS.CL , CS.AI, 2021] [DEGREE: A Data-Efficient Generation-Based Event Extraction Model](http://arxiv.org/abs/2108.12724v3)
|
125 |
+
- [ ] [CS.CL, 2021] [Event Extraction by Associating Event Types and Argument Roles](http://arxiv.org/abs/2108.10038v2)
|
126 |
+
- [ ] [CS.CL, 2021] [Event Extraction by Associating Event Types and Argument Roles](http://arxiv.org/abs/2108.10038v2)
|
127 |
+
- [ ] [CS.CL , CS.AI, 2021] [An Effective System for Multi-format Information Extraction](http://arxiv.org/abs/2108.06957v1)
|
128 |
+
- [ ] [CS.CL, 2021] [BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents](http://arxiv.org/abs/2108.04539v5)
|
129 |
+
- [ ] [CS.CL , CS.LG, 2021] [COfEE: A Comprehensive Ontology for Event Extraction from text](http://arxiv.org/abs/2107.10326v3)
|
130 |
+
- [ ] [CS.CL , CS.AI, 2021] [An artificial intelligence natural language processing pipeline for information extraction in neuroradiology](http://arxiv.org/abs/2107.10021v1)
|
131 |
+
- [ ] [CS.CL , CS.HC, 2021] [A Dialogue-based Information Extraction System for Medical Insurance Assessment](http://arxiv.org/abs/2107.05866v1)
|
132 |
+
- [ ] [CS.CL, 2021] [CasEE: A Joint Learning Framework with Cascade Decoding for Overlapping Event Extraction](http://arxiv.org/abs/2107.01583v1)
|
133 |
+
- [ ] [CS.CL, 2021] [Reinforcement Learning-based Dialogue Guided Event Extraction to Exploit Argument Relations](http://arxiv.org/abs/2106.12384v2)
|
134 |
+
- [ ] [CS.CL, 2021] [Reinforcement Learning-based Dialogue Guided Event Extraction to Exploit Argument Relations](http://arxiv.org/abs/2106.12384v2)
|
135 |
+
- [ ] [CS.CL , CS.IR, 2021] [Deep Learning Models in Detection of Dietary Supplement Adverse Event Signals from Twitter](http://arxiv.org/abs/2106.11403v1)
|
136 |
+
- [ ] [CS.CL , CS.LG, 2021] [ROPE: Reading Order Equivariant Positional Encoding for Graph-based Document Information Extraction](http://arxiv.org/abs/2106.10786v1)
|
137 |
+
- [ ] [CS.CL, 2021] [Text2Event: Controllable Sequence-to-Structure Generation for End-to-end Event Extraction](http://arxiv.org/abs/2106.09232v1)
|
138 |
+
- [ ] [CS.CL, 2021] [From Discourse to Narrative: Knowledge Projection for Event Relation Extraction](http://arxiv.org/abs/2106.08629v1)
|
139 |
+
- [ ] [CS.CL, 2021] [From Discourse to Narrative: Knowledge Projection for Event Relation Extraction](http://arxiv.org/abs/2106.08629v1)
|
140 |
+
- [ ] [CS.CL , CS.GR, 2021] [Visualization Techniques to Enhance Automated Event Extraction](http://arxiv.org/abs/2106.06588v1)
|
141 |
+
- [ ] [CS.CL, 2021] [Key Information Extraction From Documents: Evaluation And Generator](http://arxiv.org/abs/2106.14624v1)
|
142 |
+
- [ ] [CS.DL , CS.CL, 2021] [CitationIE: Leveraging the Citation Graph for Scientific Information Extraction](http://arxiv.org/abs/2106.01560v1)
|
143 |
+
- [ ] [CS.CL, 2021] [CoRI: Collective Relation Integration with Data Augmentation for Open Information Extraction](http://arxiv.org/abs/2106.00793v1)
|
144 |
+
- [ ] [CS.CL , CS.AI, 2021] [Document-level Event Extraction via Heterogeneous Graph-based Interaction Model with a Tracker](http://arxiv.org/abs/2105.14924v1)
|
145 |
+
- [ ] [CS.CL, 2021] [CLEVE: Contrastive Pre-training for Event Extraction](http://arxiv.org/abs/2105.14485v1)
|
146 |
+
- [ ] [CS.CL , CS.CV , CS.LG, 2021] [ViBERTgrid: A Jointly Trained Multi-Modal 2D Document Representation for Key Information Extraction from Documents](http://arxiv.org/abs/2105.11672v1)
|
147 |
+
- [ ] [CS.CL , CS.LG, 2021] [Improving Adverse Drug Event Extraction with SpanBERT on Different Text Typologies](http://arxiv.org/abs/2105.08882v1)
|
148 |
+
- [ ] [CS.CL , 68T99 , I.2.7, 2021] [An Annotated Commodity News Corpus for Event Extraction](http://arxiv.org/abs/2105.08214v3)
|
149 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2021] [Doc2Dict: Information Extraction as Text Generation](http://arxiv.org/abs/2105.07510v2)
|
150 |
+
- [ ] [CS.CL, 2021] [Kleister: Key Information Extraction Datasets Involving Long Documents with Complex Layouts](http://arxiv.org/abs/2105.05796v1)
|
151 |
+
- [ ] [CS.CL, 2021] [Event Argument Extraction using Causal Knowledge Structures](http://arxiv.org/abs/2105.00477v1)
|
152 |
+
- [ ] [CS.CL, 2021] [Event Argument Extraction using Causal Knowledge Structures](http://arxiv.org/abs/2105.00477v1)
|
153 |
+
- [ ] [CS.CL, 2021] [Learning from Noisy Labels for Entity-Centric Information Extraction](http://arxiv.org/abs/2104.08656v2)
|
154 |
+
- [ ] [CS.CL, 2021] [Cost-effective End-to-end Information Extraction for Semi-structured Document Images](http://arxiv.org/abs/2104.08041v2)
|
155 |
+
- [ ] [CS.CL , CS.IR, 2021] [Event Detection as Question Answering with Entity Information](http://arxiv.org/abs/2104.06969v1)
|
156 |
+
- [ ] [CS.CL, 2021] [Document-Level Event Argument Extraction by Conditional Generation](http://arxiv.org/abs/2104.05919v1)
|
157 |
+
- [ ] [CS.CL, 2021] [Document-Level Event Argument Extraction by Conditional Generation](http://arxiv.org/abs/2104.05919v1)
|
158 |
+
- [ ] [CS.CL , CS.IR, 2021] [Use of 'off-the-shelf' information extraction algorithms in clinical informatics: a feasibility study of MetaMap annotation of Italian medical notes](http://arxiv.org/abs/2104.00975v1)
|
159 |
+
- [ ] [CS.CL, 2021] [PENELOPIE: Enabling Open Information Extraction for the Greek Language through Machine Translation](http://arxiv.org/abs/2103.15075v1)
|
160 |
+
- [ ] [CS.CL, 2021] [Cross-Task Instance Representation Interactions and Label Dependencies for Joint Information Extraction with Graph Convolutional Networks](http://arxiv.org/abs/2103.09330v3)
|
161 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2021] [DeepCPCFG: Deep Learning and Context Free Grammars for End-to-End Information Extraction](http://arxiv.org/abs/2103.05908v2)
|
162 |
+
- [ ] [CS.CL, 2021] [Syntactic and Semantic-driven Learning for Open Information Extraction](http://arxiv.org/abs/2103.03448v1)
|
163 |
+
- [ ] [CS.CL , CS.IR, 2021] [Better Call the Plumber: Orchestrating Dynamic Information Extraction Pipelines](http://arxiv.org/abs/2102.10966v1)
|
164 |
+
- [ ] [CS.CL , CS.AI, 2021] [Back to Prior Knowledge: Joint Event Causality Extraction via Convolutional Semantic Infusion](http://arxiv.org/abs/2102.09923v1)
|
165 |
+
- [ ] [CS.CL , CS.AI , CS.IR , STAT.AP, 2021] [Syntactic-GCN Bert based Chinese Event Extraction](http://arxiv.org/abs/2112.09939v1)
|
166 |
+
- [ ] [CS.CL , CS.LG , STAT.ML, 2021] [GenIE: Generative Information Extraction](http://arxiv.org/abs/2112.08340v3)
|
167 |
+
- [ ] [CS.CL, 2021] [Efficient Document-level Event Extraction via Pseudo-Trigger-aware Pruned Complete Graph](http://arxiv.org/abs/2112.06013v2)
|
168 |
+
- [ ] [CS.CL , CS.AI , CS.IR , CS.LG, 2021] [Automated Drug-Related Information Extraction from French Clinical Documents: ReLyfe Approach](http://arxiv.org/abs/2112.11439v1)
|
169 |
+
- [ ] [CS.CL , CS.AI, 2021] [Active Learning for Event Extraction with Memory-based Loss Prediction Model](http://arxiv.org/abs/2112.03073v1)
|
170 |
+
- [ ] [CS.LG , CS.AI , CS.CL , ECON.GN , Q-FIN.EC , STAT.AP, 2021] [Forecasting Crude Oil Price Using Event Extraction](http://arxiv.org/abs/2111.09111v1)
|
171 |
+
- [ ] [CS.IR , CS.AI , CS.CL, 2021] [Neural News Recommendation with Event Extraction](http://arxiv.org/abs/2111.05068v2)
|
172 |
+
- [ ] [CS.CL , CS.AI , ECON.GN , Q-FIN.EC , STAT.AP, 2021] [American Hate Crime Trends Prediction with Event Extraction](http://arxiv.org/abs/2111.04951v1)
|
173 |
+
- [ ] [CS.CL , CS.AI, 2021] [JaMIE: A Pipeline Japanese Medical Information Extraction System](http://arxiv.org/abs/2111.04261v1)
|
174 |
+
- [ ] [CS.CL , CS.CV, 2021] [Information Extraction from Visually Rich Documents with Font Style Embeddings](http://arxiv.org/abs/2111.04045v2)
|
175 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2021] [An overview of event extraction and its applications](http://arxiv.org/abs/2111.03212v1)
|
176 |
+
- [ ] [CS.CL, 2021] [SERC: Syntactic and Semantic Sequence based Event Relation Classification](http://arxiv.org/abs/2111.02265v2)
|
177 |
+
- [ ] [CS.CV , CS.AI , CS.CL, 2021] [BioIE: Biomedical Information Extraction with Multi-head Attention Enhanced Graph Convolutional Network](http://arxiv.org/abs/2110.13683v1)
|
178 |
+
- [ ] [CS.CV , CS.AI , CS.CL , CS.HC , CS.IR, 2021] [CoVA: Context-aware Visual Attention for Webpage Information Extraction](http://arxiv.org/abs/2110.12320v1)
|
179 |
+
- [ ] [CS.CL , CS.AI, 2021] [milIE: Modular & Iterative Multilingual Open Information Extraction](http://arxiv.org/abs/2110.08144v2)
|
180 |
+
- [ ] [CS.CL , CS.AI, 2021] [Making Document-Level Information Extraction Right for the Right Reasons](http://arxiv.org/abs/2110.07686v2)
|
181 |
+
- [ ] [CS.CL , CS.AI, 2021] [Query and Extract: Refining Event Extraction as Type-oriented Binary Decoding](http://arxiv.org/abs/2110.07476v2)
|
182 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2021] [Generating Disentangled Arguments with Prompts: A Simple Event Extraction Framework that Works](http://arxiv.org/abs/2110.04525v2)
|
183 |
+
- [ ] [CS.CL, 2021] [Learning to Ask for Data-Efficient Event Argument Extraction](http://arxiv.org/abs/2110.00479v1)
|
184 |
+
- [ ] [CS.CL, 2021] [Learning to Ask for Data-Efficient Event Argument Extraction](http://arxiv.org/abs/2110.00479v1)
|
185 |
+
- [ ] [CS.CL , CS.AI, 2021] [LSOIE: A Large-Scale Dataset for Supervised Open Information Extraction](http://arxiv.org/abs/2101.11177v1)
|
186 |
+
- [ ] [CS.CL, 2020] [DWIE: an entity-centric dataset for multi-task document-level information extraction](http://arxiv.org/abs/2009.12626v2)
|
187 |
+
- [ ] [CS.CL, 2020] [UCD-CS at W-NUT 2020 Shared Task-3: A Text to Text Approach for COVID-19 Event Extraction on Social Media](http://arxiv.org/abs/2009.10047v2)
|
188 |
+
- [ ] [CS.CL , CS.AI, 2020] [Biomedical Event Extraction with Hierarchical Knowledge Graphs](http://arxiv.org/abs/2009.09335v3)
|
189 |
+
- [ ] [CS.CL , CS.LG, 2020] [Multi$^2$OIE: Multilingual Open Information Extraction Based on Multi-Head Attention with BERT](http://arxiv.org/abs/2009.08128v2)
|
190 |
+
- [ ] [CS.CL , CS.AI , 68T50, 68T01, 2020] [Tag and Correct: Question aware Open Information Extraction with Two-stage Decoding](http://arxiv.org/abs/2009.07406v1)
|
191 |
+
- [ ] [CS.CL , CS.AI, 2020] [Domain Knowledge Empowered Structured Neural Net for End-to-End Event Temporal Relation Extraction](http://arxiv.org/abs/2009.07373v2)
|
192 |
+
- [ ] [CS.CL , CS.AI, 2020] [Domain Knowledge Empowered Structured Neural Net for End-to-End Event Temporal Relation Extraction](http://arxiv.org/abs/2009.07373v2)
|
193 |
+
- [ ] [CS.CL, 2020] [Event Presence Prediction Helps Trigger Detection Across Languages](http://arxiv.org/abs/2009.07188v1)
|
194 |
+
- [ ] [CS.CL, 2020] [GRIT: Generative Role-filler Transformers for Document-level Event Entity Extraction](http://arxiv.org/abs/2008.09249v2)
|
195 |
+
- [ ] [CS.CL, 2020] [Tense, aspect and mood based event extraction for situation analysis and crisis management](http://arxiv.org/abs/2008.01555v1)
|
196 |
+
- [ ] [CS.CL , CS.AI , CS.LG, 2020] [Model Reduction of Shallow CNN Model for Reliable Deployment of Information Extraction from Medical Reports](http://arxiv.org/abs/2008.01572v1)
|
197 |
+
- [ ] [CS.CL, 2020] [Information Extraction of Clinical Trial Eligibility Criteria](http://arxiv.org/abs/2006.07296v6)
|
198 |
+
- [ ] [CS.CL, 2020] [Unsupervised Label-aware Event Trigger and Argument Classification](http://arxiv.org/abs/2012.15243v2)
|
199 |
+
- [ ] [CS.CL , CS.SI, 2020] [An Event Correlation Filtering Method for Fake News Detection](http://arxiv.org/abs/2012.05491v2)
|
200 |
+
- [ ] [CS.CL , CS.LG, 2020] [Extracting COVID-19 Diagnoses and Symptoms From Clinical Text: A New Annotated Corpus and Neural Event Extraction Framework](http://arxiv.org/abs/2012.00974v2)
|
201 |
+
- [ ] [CS.CL , I.2.7, 2020] [Towards Olfactory Information Extraction from Text: A Case Study on Detecting Smell Experiences in Novels](http://arxiv.org/abs/2011.08903v2)
|
202 |
+
- [ ] [CS.LG , CS.CL, 2020] [Biomedical Information Extraction for Disease Gene Prioritization](http://arxiv.org/abs/2011.05188v2)
|
203 |
+
- [ ] [CS.CL, 2020] [Graph Transformer Networks with Syntactic and Semantic Structures for Event Argument Extraction](http://arxiv.org/abs/2010.13391v1)
|
204 |
+
- [ ] [CS.CL, 2020] [Graph Transformer Networks with Syntactic and Semantic Structures for Event Argument Extraction](http://arxiv.org/abs/2010.13391v1)
|
205 |
+
- [ ] [CS.CL, 2020] [Document-level Event Extraction with Efficient End-to-end Learning of Cross-event Dependencies](http://arxiv.org/abs/2010.12787v3)
|
206 |
+
- [ ] [CS.CL, 2020] [Probing and Fine-tuning Reading Comprehension Models for Few-shot Event Extraction](http://arxiv.org/abs/2010.11325v1)
|
207 |
+
- [ ] [CS.AI , CS.CL, 2020] [Explaining black-box text classifiers for disease-treatment information extraction](http://arxiv.org/abs/2010.10873v1)
|
208 |
+
- [ ] [CS.CL , CS.IR, 2020] [FreeDOM: A Transferable Neural Architecture for Structured Information Extraction on Web Documents](http://arxiv.org/abs/2010.10755v1)
|
209 |
+
- [ ] [CS.CL , CS.IR , CS.LG , I.2.7, 2020] [Learning from similarity and information extraction from structured documents](http://arxiv.org/abs/2011.07964v2)
|
210 |
+
- [ ] [CS.CL , 60L10 , I.2.7, 2020] [Information Extraction from Swedish Medical Prescriptions with Sig-Transformer Encoder](http://arxiv.org/abs/2010.04897v1)
|
211 |
+
- [ ] [CS.CL, 2020] [OpenIE6: Iterative Grid Labeling and Coordination Analysis for Open Information Extraction](http://arxiv.org/abs/2010.03147v1)
|
212 |
+
- [ ] [CS.CL, 2020] [Resource-Enhanced Neural Model for Event Argument Extraction](http://arxiv.org/abs/2010.03022v1)
|
213 |
+
- [ ] [CS.CL, 2020] [Resource-Enhanced Neural Model for Event Argument Extraction](http://arxiv.org/abs/2010.03022v1)
|
214 |
+
- [ ] [CS.CL, 2020] [GATE: Graph Attention Transformer Encoder for Cross-lingual Relation and Event Extraction](http://arxiv.org/abs/2010.03009v2)
|
215 |
+
- [ ] [CS.CL, 2020] [GATE: Graph Attention Transformer Encoder for Cross-lingual Relation and Event Extraction](http://arxiv.org/abs/2010.03009v2)
|
run.py
CHANGED
@@ -1,10 +1,57 @@
|
|
1 |
from src.interfaces.aclanthology import AclanthologyPaperList
|
|
|
2 |
from src.utils import dump_paper_list_to_markdown_checklist
|
3 |
|
4 |
if __name__ == "__main__":
|
5 |
-
# use `bash scripts/get_aclanthology.sh` to download and prepare anthology data
|
6 |
-
|
7 |
-
ee_query = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"title": [
|
9 |
["information extraction"],
|
10 |
["event", "extraction"],
|
@@ -15,31 +62,8 @@ if __name__ == "__main__":
|
|
15 |
["event", "relation", "extraction"],
|
16 |
],
|
17 |
"venue": [
|
18 |
-
["
|
19 |
-
["emnlp"],
|
20 |
-
["naacl"],
|
21 |
-
["coling"],
|
22 |
-
["findings"],
|
23 |
-
["tacl"],
|
24 |
-
["cl"],
|
25 |
-
],
|
26 |
-
}
|
27 |
-
ee_papers = paper_list.search(ee_query)
|
28 |
-
dump_paper_list_to_markdown_checklist(ee_papers, "results/ee-paper-list.md")
|
29 |
-
|
30 |
-
doc_query = {
|
31 |
-
"title": [
|
32 |
-
["document-level"],
|
33 |
-
],
|
34 |
-
"venue": [
|
35 |
-
["acl"],
|
36 |
-
["emnlp"],
|
37 |
-
["naacl"],
|
38 |
-
["coling"],
|
39 |
-
["findings"],
|
40 |
-
["tacl"],
|
41 |
-
["cl"],
|
42 |
],
|
43 |
}
|
44 |
-
|
45 |
-
dump_paper_list_to_markdown_checklist(
|
|
|
1 |
from src.interfaces.aclanthology import AclanthologyPaperList
|
2 |
+
from src.interfaces.arxiv import ArxivPaperList
|
3 |
from src.utils import dump_paper_list_to_markdown_checklist
|
4 |
|
5 |
if __name__ == "__main__":
|
6 |
+
# # use `bash scripts/get_aclanthology.sh` to download and prepare anthology data
|
7 |
+
# acl_paper_list = AclanthologyPaperList("cache/aclanthology.json")
|
8 |
+
# ee_query = {
|
9 |
+
# "title": [
|
10 |
+
# ["information extraction"],
|
11 |
+
# ["event", "extraction"],
|
12 |
+
# ["event", "argument", "extraction"],
|
13 |
+
# ["event", "detection"],
|
14 |
+
# ["event", "classification"],
|
15 |
+
# ["event", "tracking"],
|
16 |
+
# ["event", "relation", "extraction"],
|
17 |
+
# ],
|
18 |
+
# "venue": [
|
19 |
+
# ["acl"],
|
20 |
+
# ["emnlp"],
|
21 |
+
# ["naacl"],
|
22 |
+
# ["coling"],
|
23 |
+
# ["findings"],
|
24 |
+
# ["tacl"],
|
25 |
+
# ["cl"],
|
26 |
+
# ],
|
27 |
+
# }
|
28 |
+
# ee_papers = acl_paper_list.search(ee_query)
|
29 |
+
# dump_paper_list_to_markdown_checklist(ee_papers, "results/ee-paper-list.md")
|
30 |
+
|
31 |
+
# doc_query = {
|
32 |
+
# "title": [
|
33 |
+
# ["document-level"],
|
34 |
+
# ],
|
35 |
+
# "venue": [
|
36 |
+
# ["acl"],
|
37 |
+
# ["emnlp"],
|
38 |
+
# ["naacl"],
|
39 |
+
# ["coling"],
|
40 |
+
# ["findings"],
|
41 |
+
# ["tacl"],
|
42 |
+
# ["cl"],
|
43 |
+
# ],
|
44 |
+
# }
|
45 |
+
# doc_papers = acl_paper_list.search(doc_query)
|
46 |
+
# dump_paper_list_to_markdown_checklist(doc_papers, "results/doc-paper-list.md")
|
47 |
+
|
48 |
+
arxiv_paper_list = ArxivPaperList(
|
49 |
+
"cache/ee-arxiv.xml",
|
50 |
+
use_cache=True,
|
51 |
+
title="Event Extraction OR Event Argument Extraction OR Event Detection OR Event Classification OR Event Tracking OR Event Relation Extraction OR Information Extraction",
|
52 |
+
category="cs.CL",
|
53 |
+
)
|
54 |
+
arxiv_ee_query = {
|
55 |
"title": [
|
56 |
["information extraction"],
|
57 |
["event", "extraction"],
|
|
|
62 |
["event", "relation", "extraction"],
|
63 |
],
|
64 |
"venue": [
|
65 |
+
["cs.CL"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
],
|
67 |
}
|
68 |
+
arxiv_ee_papers = arxiv_paper_list.search(arxiv_ee_query)
|
69 |
+
dump_paper_list_to_markdown_checklist(arxiv_ee_papers, "results/arxiv-ee-paper-list.md")
|
src/interfaces/aclanthology.py
CHANGED
@@ -14,7 +14,7 @@ class AclanthologyPaperList(SearchAPI):
|
|
14 |
|
15 |
self.papers = []
|
16 |
for d in data:
|
17 |
-
authors = ", ".join(
|
18 |
[self.extract_author_full(author) for author in d.get("authors", [])]
|
19 |
)
|
20 |
venue = d.get("venue", [])
|
|
|
14 |
|
15 |
self.papers = []
|
16 |
for d in data:
|
17 |
+
authors = " , ".join(
|
18 |
[self.extract_author_full(author) for author in d.get("authors", [])]
|
19 |
)
|
20 |
venue = d.get("venue", [])
|
src/interfaces/arxiv.py
CHANGED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import pathlib
|
3 |
+
|
4 |
+
import feedparser
|
5 |
+
|
6 |
+
from src.interfaces import Paper
|
7 |
+
from src.engine import SearchAPI
|
8 |
+
from src.utils import download
|
9 |
+
|
10 |
+
|
11 |
+
class ArxivPaperList(SearchAPI):
|
12 |
+
"""arXiv API
|
13 |
+
|
14 |
+
Inputs:
|
15 |
+
cache_filepath: filepath to save cached file
|
16 |
+
use_cache: will use cached file if `True`
|
17 |
+
raw: raw api query, e.g. `cat:cs.CL AND ti:event`. If set, others will be disabled
|
18 |
+
title: string of title you wanna search
|
19 |
+
author: author string
|
20 |
+
abstract: abstract string
|
21 |
+
comment: comment string
|
22 |
+
category: arXiv category, e.g. "cs.CL"
|
23 |
+
max_results: maximal returned papers
|
24 |
+
sort_by: `submittedDate` (default) or `lastUpdatedDate`
|
25 |
+
sort_order: `descending` (default) or `ascending`
|
26 |
+
|
27 |
+
Doc:
|
28 |
+
prefix explanation
|
29 |
+
- ti Title
|
30 |
+
- au Author
|
31 |
+
- abs Abstract
|
32 |
+
- co Comment
|
33 |
+
- jr Journal Reference
|
34 |
+
- cat Subject Category
|
35 |
+
- rn Report Number
|
36 |
+
- id Id (use id_list instead)
|
37 |
+
- all All of the above
|
38 |
+
|
39 |
+
logics:
|
40 |
+
- AND
|
41 |
+
- OR
|
42 |
+
- ANDNOT
|
43 |
+
|
44 |
+
symbol encoding explanation
|
45 |
+
- ( ) %28 %29 Used to group Boolean expressions for Boolean operator precedence.
|
46 |
+
- double quotes %22 %22 Used to group multiple words into phrases to search a particular field.
|
47 |
+
- space + Used to extend a search_query to include multiple fields.
|
48 |
+
|
49 |
+
e.g. https://export.arxiv.org/api/query?search_query=cat:cs.CL+AND+ti:event&start=0&max_results=2000&sortBy=submittedDate&sortOrder=descending
|
50 |
+
|
51 |
+
References:
|
52 |
+
https://arxiv.org/help/api/user-manual#title_id_published_updated
|
53 |
+
"""
|
54 |
+
API_URL = "https://export.arxiv.org/api/query?search_query="
|
55 |
+
|
56 |
+
def __init__(
|
57 |
+
self,
|
58 |
+
cache_filepath: str | pathlib.Path,
|
59 |
+
use_cache: bool = False,
|
60 |
+
raw: str = "",
|
61 |
+
title: str = "",
|
62 |
+
author: str = "",
|
63 |
+
abstract: str = "",
|
64 |
+
comment: str = "",
|
65 |
+
category: str = "cs.CL",
|
66 |
+
max_results: int = 5000,
|
67 |
+
sort_by: str = "submittedDate",
|
68 |
+
sort_order: str = "descending",
|
69 |
+
) -> None:
|
70 |
+
super().__init__()
|
71 |
+
|
72 |
+
if isinstance(cache_filepath, str):
|
73 |
+
cache_filepath = pathlib.Path(cache_filepath)
|
74 |
+
if (not cache_filepath.exists()) or (not use_cache):
|
75 |
+
cache_filepath.parent.mkdir(parents=True, exist_ok=True)
|
76 |
+
|
77 |
+
query: str = ""
|
78 |
+
if raw:
|
79 |
+
query = raw
|
80 |
+
else:
|
81 |
+
if title:
|
82 |
+
if len(query) > 0:
|
83 |
+
query += " AND "
|
84 |
+
query += f"ti:{title.strip()}"
|
85 |
+
if author:
|
86 |
+
if len(query) > 0:
|
87 |
+
query += " AND "
|
88 |
+
query += f"au:{author.strip()}"
|
89 |
+
if abstract:
|
90 |
+
if len(query) > 0:
|
91 |
+
query += " AND "
|
92 |
+
query += f"abs:{abstract.strip()}"
|
93 |
+
if comment:
|
94 |
+
if len(query) > 0:
|
95 |
+
query += " AND "
|
96 |
+
query += f"co:{comment.strip()}"
|
97 |
+
if category:
|
98 |
+
if len(query) > 0:
|
99 |
+
query += " AND "
|
100 |
+
query += f"cat:{category.strip()}"
|
101 |
+
|
102 |
+
query = query.strip().replace(" ", "+")
|
103 |
+
query = query.replace("(", "%28")
|
104 |
+
query = query.replace(")", "%29")
|
105 |
+
query = query.replace("\"", "%22")
|
106 |
+
|
107 |
+
url = f"{self.API_URL}{query}&start=0&max_results={max_results}&sortBy={sort_by}&sortOrder={sort_order}"
|
108 |
+
download(url, cache_filepath)
|
109 |
+
|
110 |
+
feed_string = cache_filepath.open("rt", encoding="utf8").read()
|
111 |
+
feed = feedparser.parse(feed_string)
|
112 |
+
for entry in feed.entries:
|
113 |
+
author = ""
|
114 |
+
if hasattr(entry, "authors"):
|
115 |
+
author = ' , '.join(author.name for author in entry.authors)
|
116 |
+
url = ""
|
117 |
+
doi = ""
|
118 |
+
for link in entry.links:
|
119 |
+
if link.rel == "alternate":
|
120 |
+
url = link.href
|
121 |
+
if "doi" in link.href:
|
122 |
+
doi = link.href
|
123 |
+
if not url:
|
124 |
+
url = entry.links[0].href
|
125 |
+
if sort_by == "submittedDate":
|
126 |
+
date = entry.published_parsed
|
127 |
+
else:
|
128 |
+
date = entry.updated_parsed
|
129 |
+
|
130 |
+
title = re.sub(r"[\s\n]+", " ", entry.title, flags=re.MULTILINE).strip()
|
131 |
+
abstract = re.sub(r"[\s\n]+", " ", entry.summary, flags=re.MULTILINE).strip()
|
132 |
+
paper = Paper(
|
133 |
+
title,
|
134 |
+
author,
|
135 |
+
abstract,
|
136 |
+
url,
|
137 |
+
doi,
|
138 |
+
" , ".join([t['term'] for t in entry.tags]),
|
139 |
+
str(date.tm_year),
|
140 |
+
str(date.tm_mon),
|
141 |
+
)
|
142 |
+
self.papers.append(paper)
|