Spaces:
Runtime error
Runtime error
allen
commited on
Commit
·
35a28f9
1
Parent(s):
1a7d487
add
Browse files- README.md +1 -1
- relation_extraction.py +57 -29
README.md
CHANGED
@@ -135,8 +135,8 @@ This metric has strict filter mechanism, if any of the prediction's entity names
|
|
135 |
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
136 |
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
137 |
year = {2020},
|
|
|
138 |
}
|
139 |
-
*https://arxiv.org/abs/2009.10684*
|
140 |
```
|
141 |
## Further References
|
142 |
This evaluation metric implementation uses
|
|
|
135 |
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
136 |
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
137 |
year = {2020},
|
138 |
+
link = https://arxiv.org/abs/2009.10684
|
139 |
}
|
|
|
140 |
```
|
141 |
## Further References
|
142 |
This evaluation metric implementation uses
|
relation_extraction.py
CHANGED
@@ -2,45 +2,69 @@ import evaluate
|
|
2 |
import datasets
|
3 |
import numpy as np
|
4 |
|
5 |
-
#
|
6 |
_CITATION = """\
|
7 |
-
@
|
8 |
-
|
9 |
-
|
10 |
-
year={2020}
|
|
|
11 |
}
|
12 |
"""
|
13 |
|
14 |
-
#
|
15 |
_DESCRIPTION = """\
|
16 |
-
This
|
17 |
"""
|
18 |
|
19 |
|
20 |
-
#
|
21 |
_KWARGS_DESCRIPTION = """
|
22 |
-
Calculates how good are predictions given some references, using
|
23 |
Args:
|
24 |
-
predictions: list of
|
25 |
-
should be
|
26 |
-
references: list of
|
27 |
-
|
28 |
Returns:
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
Examples:
|
32 |
-
|
33 |
-
|
34 |
-
>>>
|
35 |
-
|
36 |
-
|
37 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
|
40 |
-
# TODO: Define external resources urls if needed
|
41 |
-
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
42 |
-
|
43 |
-
|
44 |
def convert_format(data:list):
|
45 |
"""
|
46 |
Args:
|
@@ -51,13 +75,12 @@ def convert_format(data:list):
|
|
51 |
'head_type': ['product', 'brand'...],
|
52 |
'type': ['sell', 'sell'...],
|
53 |
'tail': ['國際認證之色乳', '國際認證之色乳'...],
|
54 |
-
'tail_type': ['product', 'product'...]},
|
55 |
-
|
56 |
{'head': ['SABONTAIWAN', 'SNTAIWAN'...],
|
57 |
'head_type': ['brand', 'brand'...],
|
58 |
'type': ['sell', 'sell'...],
|
59 |
'tail': ['大馬士革玫瑰有機光燦系列', '大馬士革玫瑰有機光燦系列'...],
|
60 |
-
'tail_type': ['product', 'product'...]}
|
61 |
...
|
62 |
]
|
63 |
"""
|
@@ -78,7 +101,12 @@ def convert_format(data:list):
|
|
78 |
|
79 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
80 |
class relation_extraction(evaluate.Metric):
|
81 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def _info(self):
|
84 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
|
|
2 |
import datasets
|
3 |
import numpy as np
|
4 |
|
5 |
+
# Add BibTeX citation
|
6 |
_CITATION = """\
|
7 |
+
@Paper{
|
8 |
+
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
9 |
+
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
10 |
+
year = {2020},
|
11 |
+
link = https://arxiv.org/abs/2009.10684
|
12 |
}
|
13 |
"""
|
14 |
|
15 |
+
# Add description of the module here
|
16 |
_DESCRIPTION = """\
|
17 |
+
This metric is used for evaluating the quality of relation extraction output. By calculating the Micro and Macro F1 score of every relation extraction outputs to ensure the quality.
|
18 |
"""
|
19 |
|
20 |
|
21 |
+
# Add description of the arguments of the module here
|
22 |
_KWARGS_DESCRIPTION = """
|
23 |
+
Calculates how good are predictions given some references, using Micro and Macro F1 scores
|
24 |
Args:
|
25 |
+
predictions: list of list of dictionary, including relation and its type
|
26 |
+
dictionary should be key value pair like entity name link to its type
|
27 |
+
references: list of list of dictionary, including relation and its type
|
28 |
+
dictionary should be entity name pair like entity name link to its type
|
29 |
Returns:
|
30 |
+
evaluation result:
|
31 |
+
- **sell** (`dictionary`): score of type sell
|
32 |
+
- **tp** : true positive count
|
33 |
+
- **fp** : false positive count
|
34 |
+
- **fn** : false negative count
|
35 |
+
- **p** : precision
|
36 |
+
- **r** : recall
|
37 |
+
- **f1** : micro f1 score
|
38 |
+
- **ALL** (`dictionary`): score of all of the type (sell and belongs to)
|
39 |
+
- **tp** : true positive count
|
40 |
+
- **fp** : false positive count
|
41 |
+
- **fn** : false negative count
|
42 |
+
- **p** : precision
|
43 |
+
- **r** : recall
|
44 |
+
- **f1** : micro f1 score
|
45 |
+
- **Macro_f1** : macro f1 score
|
46 |
+
- **Macro_p** : macro precision
|
47 |
+
- **Macro_r** : macro recall
|
48 |
Examples:
|
49 |
+
>>> metric_path = "Ikala-allen/relation_extraction"
|
50 |
+
>>> module = evaluate.load(metric_path)
|
51 |
+
>>> references = [
|
52 |
+
... [
|
53 |
+
... {"head": "phip igments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
54 |
+
... {"head": "tinadaviespigments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
55 |
+
... ]
|
56 |
+
... ]
|
57 |
+
>>> predictions = [
|
58 |
+
... [
|
59 |
+
... {"head": "phipigments", "head_type": "product", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
60 |
+
... {"head": "tinadaviespigments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
61 |
+
... ]
|
62 |
+
... ]
|
63 |
+
>>> evaluation_scores = module.compute(predictions=predictions, references=references)
|
64 |
+
>>> print(evaluation_scores)
|
65 |
+
{'sell': {'tp': 1, 'fp': 1, 'fn': 1, 'p': 50.0, 'r': 50.0, 'f1': 50.0}, 'ALL': {'tp': 1, 'fp': 1, 'fn': 1, 'p': 50.0, 'r': 50.0, 'f1': 50.0, 'Macro_f1': 50.0, 'Macro_p': 50.0, 'Macro_r': 50.0}}
|
66 |
"""
|
67 |
|
|
|
|
|
|
|
|
|
68 |
def convert_format(data:list):
|
69 |
"""
|
70 |
Args:
|
|
|
75 |
'head_type': ['product', 'brand'...],
|
76 |
'type': ['sell', 'sell'...],
|
77 |
'tail': ['國際認證之色乳', '國際認證之色乳'...],
|
78 |
+
'tail_type': ['product', 'product'...]}, # first element
|
|
|
79 |
{'head': ['SABONTAIWAN', 'SNTAIWAN'...],
|
80 |
'head_type': ['brand', 'brand'...],
|
81 |
'type': ['sell', 'sell'...],
|
82 |
'tail': ['大馬士革玫瑰有機光燦系列', '大馬士革玫瑰有機光燦系列'...],
|
83 |
+
'tail_type': ['product', 'product'...]} # second element
|
84 |
...
|
85 |
]
|
86 |
"""
|
|
|
101 |
|
102 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
103 |
class relation_extraction(evaluate.Metric):
|
104 |
+
"""
|
105 |
+
evaluation metric of relation extraction
|
106 |
+
inputs:
|
107 |
+
predictions : (`list` of `list`s of `dictionary`s) about relation and its type of prediction
|
108 |
+
references : (`list` of `list`s of `dictionary`s) about references for each relation and its type.
|
109 |
+
"""
|
110 |
|
111 |
def _info(self):
|
112 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|