Update spaCy pipeline
Browse files- .gitattributes +1 -0
- README.md +15 -13
- config.cfg +50 -24
- en_tako_query_analyzer-any-py3-none-any.whl +2 -2
- meta.json +28 -21
- ner/model +2 -2
- ner/moves +1 -1
- textcat_classify/cfg +1 -1
- textcat_classify/model +2 -2
- textcat_multilabel/cfg +13 -0
- textcat_multilabel/model +3 -0
- tok2vec/model +2 -2
- vocab/strings.json +2 -2
.gitattributes
CHANGED
@@ -44,3 +44,4 @@ textcat_classify/model filter=lfs diff=lfs merge=lfs -text
|
|
44 |
tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
|
45 |
vocab/strings.json filter=lfs diff=lfs merge=lfs -text
|
46 |
senter/model filter=lfs diff=lfs merge=lfs -text
|
|
|
|
44 |
tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
|
45 |
vocab/strings.json filter=lfs diff=lfs merge=lfs -text
|
46 |
senter/model filter=lfs diff=lfs merge=lfs -text
|
47 |
+
textcat_multilabel/model filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
tags:
|
3 |
- spacy
|
4 |
- token-classification
|
|
|
5 |
language:
|
6 |
- en
|
7 |
model-index:
|
@@ -24,10 +25,10 @@ model-index:
|
|
24 |
| Feature | Description |
|
25 |
| --- | --- |
|
26 |
| **Name** | `en_tako_query_analyzer` |
|
27 |
-
| **Version** | `0.0.
|
28 |
| **spaCy** | `>=3.7.5,<3.8.0` |
|
29 |
-
| **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `
|
30 |
-
| **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `
|
31 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
32 |
| **Sources** | n/a |
|
33 |
| **License** | n/a |
|
@@ -37,13 +38,14 @@ model-index:
|
|
37 |
|
38 |
<details>
|
39 |
|
40 |
-
<summary>View label scheme (
|
41 |
|
42 |
| Component | Labels |
|
43 |
| --- | --- |
|
44 |
| **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
|
45 |
| **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
|
46 |
| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
|
|
|
47 |
| **`textcat_classify`** | `ACCEPT`, `REJECT` |
|
48 |
|
49 |
</details>
|
@@ -56,12 +58,12 @@ model-index:
|
|
56 |
| `ENTS_P` | 0.00 |
|
57 |
| `ENTS_R` | 0.00 |
|
58 |
| `ENTS_PER_TYPE` | 0.00 |
|
59 |
-
| `CATS_SCORE` |
|
60 |
-
| `CATS_MICRO_P` |
|
61 |
-
| `CATS_MICRO_R` |
|
62 |
-
| `CATS_MICRO_F` |
|
63 |
-
| `CATS_MACRO_P` |
|
64 |
-
| `CATS_MACRO_R` |
|
65 |
-
| `CATS_MACRO_F` |
|
66 |
-
| `CATS_MACRO_AUC` |
|
67 |
-
| `TEXTCAT_CLASSIFY_LOSS` |
|
|
|
2 |
tags:
|
3 |
- spacy
|
4 |
- token-classification
|
5 |
+
- text-classification
|
6 |
language:
|
7 |
- en
|
8 |
model-index:
|
|
|
25 |
| Feature | Description |
|
26 |
| --- | --- |
|
27 |
| **Name** | `en_tako_query_analyzer` |
|
28 |
+
| **Version** | `0.0.4` |
|
29 |
| **spaCy** | `>=3.7.5,<3.8.0` |
|
30 |
+
| **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
31 |
+
| **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
|
32 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
33 |
| **Sources** | n/a |
|
34 |
| **License** | n/a |
|
|
|
38 |
|
39 |
<details>
|
40 |
|
41 |
+
<summary>View label scheme (124 labels for 5 components)</summary>
|
42 |
|
43 |
| Component | Labels |
|
44 |
| --- | --- |
|
45 |
| **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
|
46 |
| **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
|
47 |
| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
|
48 |
+
| **`textcat_multilabel`** | `Finance`, `Economics`, `Demographics`, `Health`, `Politics`, `Sports`, `Climate`, `Other` |
|
49 |
| **`textcat_classify`** | `ACCEPT`, `REJECT` |
|
50 |
|
51 |
</details>
|
|
|
58 |
| `ENTS_P` | 0.00 |
|
59 |
| `ENTS_R` | 0.00 |
|
60 |
| `ENTS_PER_TYPE` | 0.00 |
|
61 |
+
| `CATS_SCORE` | 82.56 |
|
62 |
+
| `CATS_MICRO_P` | 82.30 |
|
63 |
+
| `CATS_MICRO_R` | 82.30 |
|
64 |
+
| `CATS_MICRO_F` | 82.30 |
|
65 |
+
| `CATS_MACRO_P` | 82.33 |
|
66 |
+
| `CATS_MACRO_R` | 82.30 |
|
67 |
+
| `CATS_MACRO_F` | 82.30 |
|
68 |
+
| `CATS_MACRO_AUC` | 89.93 |
|
69 |
+
| `TEXTCAT_CLASSIFY_LOSS` | 218.20 |
|
config.cfg
CHANGED
@@ -7,7 +7,7 @@ init_tok2vec = null
|
|
7 |
[variables]
|
8 |
wandb_project_name = "tako-query-filter"
|
9 |
wandb_team_name = "tako-team"
|
10 |
-
base_model = "
|
11 |
|
12 |
[system]
|
13 |
gpu_allocator = "pytorch"
|
@@ -15,7 +15,7 @@ seed = 0
|
|
15 |
|
16 |
[nlp]
|
17 |
lang = "en"
|
18 |
-
pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","senter","
|
19 |
batch_size = 1000
|
20 |
disabled = []
|
21 |
before_creation = null
|
@@ -31,13 +31,6 @@ factory = "attribute_ruler"
|
|
31 |
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
32 |
validate = false
|
33 |
|
34 |
-
[components.lemmatizer]
|
35 |
-
factory = "lemmatizer"
|
36 |
-
mode = "rule"
|
37 |
-
model = null
|
38 |
-
overwrite = false
|
39 |
-
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
40 |
-
|
41 |
[components.ner]
|
42 |
factory = "ner"
|
43 |
incorrect_spans_key = null
|
@@ -49,7 +42,7 @@ update_with_oracle_cut_size = 100
|
|
49 |
@architectures = "spacy.TransitionBasedParser.v2"
|
50 |
state_type = "ner"
|
51 |
extra_state_tokens = false
|
52 |
-
hidden_width =
|
53 |
maxout_pieces = 3
|
54 |
use_upper = true
|
55 |
nO = null
|
@@ -129,7 +122,7 @@ upstream = "tok2vec"
|
|
129 |
factory = "weighted_textcat"
|
130 |
class_weights = [0.67,0.33]
|
131 |
scorer = {"@scorers":"spacy.textcat_scorer.v2"}
|
132 |
-
threshold = 0.
|
133 |
|
134 |
[components.textcat_classify.model]
|
135 |
@architectures = "spacy.TextCatEnsemble.v2"
|
@@ -148,13 +141,47 @@ nO = null
|
|
148 |
|
149 |
[components.textcat_classify.model.tok2vec.embed]
|
150 |
@architectures = "spacy.MultiHashEmbed.v2"
|
151 |
-
width =
|
152 |
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
|
153 |
-
rows = [2000,
|
154 |
include_static_vectors = true
|
155 |
|
156 |
[components.textcat_classify.model.tok2vec.encode]
|
157 |
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
width = 128
|
159 |
window_size = 1
|
160 |
maxout_pieces = 3
|
@@ -170,7 +197,7 @@ factory = "tok2vec"
|
|
170 |
@architectures = "spacy.MultiHashEmbed.v2"
|
171 |
width = 256
|
172 |
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
173 |
-
rows = [
|
174 |
include_static_vectors = true
|
175 |
|
176 |
[components.tok2vec.model.encode]
|
@@ -232,8 +259,8 @@ patience = 1000
|
|
232 |
max_epochs = 0
|
233 |
max_steps = 20000
|
234 |
eval_frequency = 100
|
235 |
-
frozen_components = ["
|
236 |
-
annotating_components = ["ner"]
|
237 |
before_to_disk = null
|
238 |
before_update = null
|
239 |
|
@@ -287,22 +314,21 @@ eps = 0.00000001
|
|
287 |
learn_rate = 0.001
|
288 |
|
289 |
[training.score_weights]
|
290 |
-
tag_acc = 0.
|
291 |
-
dep_uas = 0.
|
292 |
-
dep_las = 0.
|
293 |
dep_las_per_type = null
|
294 |
sents_p = 0.0
|
295 |
sents_r = 0.0
|
296 |
-
sents_f = 0.
|
297 |
-
|
298 |
-
ents_f = 0.1
|
299 |
ents_p = 0.0
|
300 |
ents_r = 0.0
|
301 |
ents_per_type = null
|
302 |
-
cats_score = 0.
|
303 |
cats_score_desc = null
|
304 |
cats_micro_p = null
|
305 |
-
cats_micro_r = 0.
|
306 |
cats_micro_f = null
|
307 |
cats_macro_p = null
|
308 |
cats_macro_r = null
|
|
|
7 |
[variables]
|
8 |
wandb_project_name = "tako-query-filter"
|
9 |
wandb_team_name = "tako-team"
|
10 |
+
base_model = "topic/upbeat-bush"
|
11 |
|
12 |
[system]
|
13 |
gpu_allocator = "pytorch"
|
|
|
15 |
|
16 |
[nlp]
|
17 |
lang = "en"
|
18 |
+
pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","senter","tok2vec","ner","textcat_multilabel","textcat_classify"]
|
19 |
batch_size = 1000
|
20 |
disabled = []
|
21 |
before_creation = null
|
|
|
31 |
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
32 |
validate = false
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
[components.ner]
|
35 |
factory = "ner"
|
36 |
incorrect_spans_key = null
|
|
|
42 |
@architectures = "spacy.TransitionBasedParser.v2"
|
43 |
state_type = "ner"
|
44 |
extra_state_tokens = false
|
45 |
+
hidden_width = 256
|
46 |
maxout_pieces = 3
|
47 |
use_upper = true
|
48 |
nO = null
|
|
|
122 |
factory = "weighted_textcat"
|
123 |
class_weights = [0.67,0.33]
|
124 |
scorer = {"@scorers":"spacy.textcat_scorer.v2"}
|
125 |
+
threshold = 0.3
|
126 |
|
127 |
[components.textcat_classify.model]
|
128 |
@architectures = "spacy.TextCatEnsemble.v2"
|
|
|
141 |
|
142 |
[components.textcat_classify.model.tok2vec.embed]
|
143 |
@architectures = "spacy.MultiHashEmbed.v2"
|
144 |
+
width = 256
|
145 |
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
|
146 |
+
rows = [4000,1000,2000,1000,1000]
|
147 |
include_static_vectors = true
|
148 |
|
149 |
[components.textcat_classify.model.tok2vec.encode]
|
150 |
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
151 |
+
width = 256
|
152 |
+
window_size = 1
|
153 |
+
maxout_pieces = 3
|
154 |
+
depth = 8
|
155 |
+
|
156 |
+
[components.textcat_multilabel]
|
157 |
+
factory = "textcat_multilabel"
|
158 |
+
scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v2"}
|
159 |
+
threshold = 0.3
|
160 |
+
|
161 |
+
[components.textcat_multilabel.model]
|
162 |
+
@architectures = "spacy.TextCatEnsemble.v2"
|
163 |
+
nO = null
|
164 |
+
|
165 |
+
[components.textcat_multilabel.model.linear_model]
|
166 |
+
@architectures = "spacy.TextCatBOW.v3"
|
167 |
+
exclusive_classes = false
|
168 |
+
length = 262144
|
169 |
+
ngram_size = 1
|
170 |
+
no_output_layer = false
|
171 |
+
nO = null
|
172 |
+
|
173 |
+
[components.textcat_multilabel.model.tok2vec]
|
174 |
+
@architectures = "spacy.Tok2Vec.v2"
|
175 |
+
|
176 |
+
[components.textcat_multilabel.model.tok2vec.embed]
|
177 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
178 |
+
width = 128
|
179 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
|
180 |
+
rows = [2000,1000,1000,1000,1000]
|
181 |
+
include_static_vectors = true
|
182 |
+
|
183 |
+
[components.textcat_multilabel.model.tok2vec.encode]
|
184 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
185 |
width = 128
|
186 |
window_size = 1
|
187 |
maxout_pieces = 3
|
|
|
197 |
@architectures = "spacy.MultiHashEmbed.v2"
|
198 |
width = 256
|
199 |
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
200 |
+
rows = [2000,1000,1000,1000]
|
201 |
include_static_vectors = true
|
202 |
|
203 |
[components.tok2vec.model.encode]
|
|
|
259 |
max_epochs = 0
|
260 |
max_steps = 20000
|
261 |
eval_frequency = 100
|
262 |
+
frozen_components = ["tok2vec","ner","textcat_multilabel"]
|
263 |
+
annotating_components = ["ner","textcat_multilabel"]
|
264 |
before_to_disk = null
|
265 |
before_update = null
|
266 |
|
|
|
314 |
learn_rate = 0.001
|
315 |
|
316 |
[training.score_weights]
|
317 |
+
tag_acc = 0.25
|
318 |
+
dep_uas = 0.12
|
319 |
+
dep_las = 0.12
|
320 |
dep_las_per_type = null
|
321 |
sents_p = 0.0
|
322 |
sents_r = 0.0
|
323 |
+
sents_f = 0.25
|
324 |
+
ents_f = 0.12
|
|
|
325 |
ents_p = 0.0
|
326 |
ents_r = 0.0
|
327 |
ents_per_type = null
|
328 |
+
cats_score = 0.06
|
329 |
cats_score_desc = null
|
330 |
cats_micro_p = null
|
331 |
+
cats_micro_r = 0.06
|
332 |
cats_micro_f = null
|
333 |
cats_macro_p = null
|
334 |
cats_macro_r = null
|
en_tako_query_analyzer-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8de7f18d2cc25ababfa5d8131bb5a3d3e4ba8415066bf97493f4b388da4c6e9
|
3 |
+
size 649860398
|
meta.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"tako_query_analyzer",
|
4 |
-
"version":"0.0.
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
@@ -120,9 +120,6 @@
|
|
120 |
],
|
121 |
"attribute_ruler":[
|
122 |
|
123 |
-
],
|
124 |
-
"lemmatizer":[
|
125 |
-
|
126 |
],
|
127 |
"tok2vec":[
|
128 |
|
@@ -148,6 +145,16 @@
|
|
148 |
"TIME",
|
149 |
"WORK_OF_ART"
|
150 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
"textcat_classify":[
|
152 |
"ACCEPT",
|
153 |
"REJECT"
|
@@ -159,9 +166,9 @@
|
|
159 |
"parser",
|
160 |
"attribute_ruler",
|
161 |
"senter",
|
162 |
-
"lemmatizer",
|
163 |
"tok2vec",
|
164 |
"ner",
|
|
|
165 |
"textcat_classify"
|
166 |
],
|
167 |
"components":[
|
@@ -170,9 +177,9 @@
|
|
170 |
"parser",
|
171 |
"attribute_ruler",
|
172 |
"senter",
|
173 |
-
"lemmatizer",
|
174 |
"tok2vec",
|
175 |
"ner",
|
|
|
176 |
"textcat_classify"
|
177 |
],
|
178 |
"disabled":[
|
@@ -183,28 +190,28 @@
|
|
183 |
"ents_p":0.0,
|
184 |
"ents_r":0.0,
|
185 |
"ents_per_type":0.0,
|
186 |
-
"cats_score":0.
|
187 |
"cats_score_desc":"F (ACCEPT)",
|
188 |
-
"cats_micro_p":0.
|
189 |
-
"cats_micro_r":0.
|
190 |
-
"cats_micro_f":0.
|
191 |
-
"cats_macro_p":0.
|
192 |
-
"cats_macro_r":0.
|
193 |
-
"cats_macro_f":0.
|
194 |
-
"cats_macro_auc":0.
|
195 |
"cats_f_per_type":{
|
196 |
"ACCEPT":{
|
197 |
-
"p":0.
|
198 |
-
"r":0.
|
199 |
-
"f":0.
|
200 |
},
|
201 |
"REJECT":{
|
202 |
-
"p":0.
|
203 |
-
"r":0.
|
204 |
-
"f":0.
|
205 |
}
|
206 |
},
|
207 |
-
"textcat_classify_loss":
|
208 |
},
|
209 |
"requirements":[
|
210 |
|
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"tako_query_analyzer",
|
4 |
+
"version":"0.0.4",
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
|
|
120 |
],
|
121 |
"attribute_ruler":[
|
122 |
|
|
|
|
|
|
|
123 |
],
|
124 |
"tok2vec":[
|
125 |
|
|
|
145 |
"TIME",
|
146 |
"WORK_OF_ART"
|
147 |
],
|
148 |
+
"textcat_multilabel":[
|
149 |
+
"Finance",
|
150 |
+
"Economics",
|
151 |
+
"Demographics",
|
152 |
+
"Health",
|
153 |
+
"Politics",
|
154 |
+
"Sports",
|
155 |
+
"Climate",
|
156 |
+
"Other"
|
157 |
+
],
|
158 |
"textcat_classify":[
|
159 |
"ACCEPT",
|
160 |
"REJECT"
|
|
|
166 |
"parser",
|
167 |
"attribute_ruler",
|
168 |
"senter",
|
|
|
169 |
"tok2vec",
|
170 |
"ner",
|
171 |
+
"textcat_multilabel",
|
172 |
"textcat_classify"
|
173 |
],
|
174 |
"components":[
|
|
|
177 |
"parser",
|
178 |
"attribute_ruler",
|
179 |
"senter",
|
|
|
180 |
"tok2vec",
|
181 |
"ner",
|
182 |
+
"textcat_multilabel",
|
183 |
"textcat_classify"
|
184 |
],
|
185 |
"disabled":[
|
|
|
190 |
"ents_p":0.0,
|
191 |
"ents_r":0.0,
|
192 |
"ents_per_type":0.0,
|
193 |
+
"cats_score":0.8256157635,
|
194 |
"cats_score_desc":"F (ACCEPT)",
|
195 |
+
"cats_micro_p":0.823,
|
196 |
+
"cats_micro_r":0.823,
|
197 |
+
"cats_micro_f":0.823,
|
198 |
+
"cats_macro_p":0.8232909619,
|
199 |
+
"cats_macro_r":0.823,
|
200 |
+
"cats_macro_f":0.822960166,
|
201 |
+
"cats_macro_auc":0.899297,
|
202 |
"cats_f_per_type":{
|
203 |
"ACCEPT":{
|
204 |
+
"p":0.813592233,
|
205 |
+
"r":0.838,
|
206 |
+
"f":0.8256157635
|
207 |
},
|
208 |
"REJECT":{
|
209 |
+
"p":0.8329896907,
|
210 |
+
"r":0.808,
|
211 |
+
"f":0.8203045685
|
212 |
}
|
213 |
},
|
214 |
+
"textcat_classify_loss":2.1820269685
|
215 |
},
|
216 |
"requirements":[
|
217 |
|
ner/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a18b22cda1f4ebaa4786ba94b7baa9379ecca5475386ddd9c8b258717defc3a2
|
3 |
+
size 2715735
|
ner/moves
CHANGED
@@ -1 +1 @@
|
|
1 |
-
��moves
|
|
|
1 |
+
��moves�H{"0":{},"1":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"2":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"3":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"4":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110,"":1},"5":{"":1}}�cfg��neg_key�
|
textcat_classify/cfg
CHANGED
@@ -3,6 +3,6 @@
|
|
3 |
"ACCEPT",
|
4 |
"REJECT"
|
5 |
],
|
6 |
-
"threshold":0.
|
7 |
"positive_label":"ACCEPT"
|
8 |
}
|
|
|
3 |
"ACCEPT",
|
4 |
"REJECT"
|
5 |
],
|
6 |
+
"threshold":0.3,
|
7 |
"positive_label":"ACCEPT"
|
8 |
}
|
textcat_classify/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43f147a53892c746a8f24180b5870c414902c640b10f9ca2927ed2b91a8bf5fd
|
3 |
+
size 36071049
|
textcat_multilabel/cfg
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"labels":[
|
3 |
+
"Finance",
|
4 |
+
"Economics",
|
5 |
+
"Demographics",
|
6 |
+
"Health",
|
7 |
+
"Politics",
|
8 |
+
"Sports",
|
9 |
+
"Climate",
|
10 |
+
"Other"
|
11 |
+
],
|
12 |
+
"threshold":0.3
|
13 |
+
}
|
textcat_multilabel/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c238f141aa54e070622e6506599093673815490619c560feee160d93c37a502a
|
3 |
+
size 15382175
|
tok2vec/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f59ffdaeb93abafba597c7089b697ab873950f84b9b9bb5fe16022de2c9702
|
3 |
+
size 28290008
|
vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33316624b8e8ce9be5d4762929948a8c6bef9a9d6a138aae4110f40cd54858a3
|
3 |
+
size 10662076
|