noahjax commited on
Commit
a71e3f4
·
verified ·
1 Parent(s): 723fe48

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -44,3 +44,4 @@ textcat_classify/model filter=lfs diff=lfs merge=lfs -text
44
  tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
45
  vocab/strings.json filter=lfs diff=lfs merge=lfs -text
46
  senter/model filter=lfs diff=lfs merge=lfs -text
 
 
44
  tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
45
  vocab/strings.json filter=lfs diff=lfs merge=lfs -text
46
  senter/model filter=lfs diff=lfs merge=lfs -text
47
+ textcat_multilabel/model filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -2,6 +2,7 @@
2
  tags:
3
  - spacy
4
  - token-classification
 
5
  language:
6
  - en
7
  model-index:
@@ -24,10 +25,10 @@ model-index:
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `en_tako_query_analyzer` |
27
- | **Version** | `0.0.3` |
28
  | **spaCy** | `>=3.7.5,<3.8.0` |
29
- | **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `lemmatizer`, `tok2vec`, `ner`, `textcat_classify` |
30
- | **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `lemmatizer`, `tok2vec`, `ner`, `textcat_classify` |
31
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
@@ -37,13 +38,14 @@ model-index:
37
 
38
  <details>
39
 
40
- <summary>View label scheme (116 labels for 4 components)</summary>
41
 
42
  | Component | Labels |
43
  | --- | --- |
44
  | **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
45
  | **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
46
  | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
 
47
  | **`textcat_classify`** | `ACCEPT`, `REJECT` |
48
 
49
  </details>
@@ -56,12 +58,12 @@ model-index:
56
  | `ENTS_P` | 0.00 |
57
  | `ENTS_R` | 0.00 |
58
  | `ENTS_PER_TYPE` | 0.00 |
59
- | `CATS_SCORE` | 85.07 |
60
- | `CATS_MICRO_P` | 85.31 |
61
- | `CATS_MICRO_R` | 85.31 |
62
- | `CATS_MICRO_F` | 85.31 |
63
- | `CATS_MACRO_P` | 85.35 |
64
- | `CATS_MACRO_R` | 85.31 |
65
- | `CATS_MACRO_F` | 85.31 |
66
- | `CATS_MACRO_AUC` | 91.67 |
67
- | `TEXTCAT_CLASSIFY_LOSS` | 94.04 |
 
2
  tags:
3
  - spacy
4
  - token-classification
5
+ - text-classification
6
  language:
7
  - en
8
  model-index:
 
25
  | Feature | Description |
26
  | --- | --- |
27
  | **Name** | `en_tako_query_analyzer` |
28
+ | **Version** | `0.0.4` |
29
  | **spaCy** | `>=3.7.5,<3.8.0` |
30
+ | **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
31
+ | **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `senter`, `tok2vec`, `ner`, `textcat_multilabel`, `textcat_classify` |
32
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
33
  | **Sources** | n/a |
34
  | **License** | n/a |
 
38
 
39
  <details>
40
 
41
+ <summary>View label scheme (124 labels for 5 components)</summary>
42
 
43
  | Component | Labels |
44
  | --- | --- |
45
  | **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
46
  | **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
47
  | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
48
+ | **`textcat_multilabel`** | `Finance`, `Economics`, `Demographics`, `Health`, `Politics`, `Sports`, `Climate`, `Other` |
49
  | **`textcat_classify`** | `ACCEPT`, `REJECT` |
50
 
51
  </details>
 
58
  | `ENTS_P` | 0.00 |
59
  | `ENTS_R` | 0.00 |
60
  | `ENTS_PER_TYPE` | 0.00 |
61
+ | `CATS_SCORE` | 82.56 |
62
+ | `CATS_MICRO_P` | 82.30 |
63
+ | `CATS_MICRO_R` | 82.30 |
64
+ | `CATS_MICRO_F` | 82.30 |
65
+ | `CATS_MACRO_P` | 82.33 |
66
+ | `CATS_MACRO_R` | 82.30 |
67
+ | `CATS_MACRO_F` | 82.30 |
68
+ | `CATS_MACRO_AUC` | 89.93 |
69
+ | `TEXTCAT_CLASSIFY_LOSS` | 218.20 |
config.cfg CHANGED
@@ -7,7 +7,7 @@ init_tok2vec = null
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
10
- base_model = "ner/dashing-wind"
11
 
12
  [system]
13
  gpu_allocator = "pytorch"
@@ -15,7 +15,7 @@ seed = 0
15
 
16
  [nlp]
17
  lang = "en"
18
- pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","senter","lemmatizer","tok2vec","ner","textcat_classify"]
19
  batch_size = 1000
20
  disabled = []
21
  before_creation = null
@@ -31,13 +31,6 @@ factory = "attribute_ruler"
31
  scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
32
  validate = false
33
 
34
- [components.lemmatizer]
35
- factory = "lemmatizer"
36
- mode = "rule"
37
- model = null
38
- overwrite = false
39
- scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
40
-
41
  [components.ner]
42
  factory = "ner"
43
  incorrect_spans_key = null
@@ -49,7 +42,7 @@ update_with_oracle_cut_size = 100
49
  @architectures = "spacy.TransitionBasedParser.v2"
50
  state_type = "ner"
51
  extra_state_tokens = false
52
- hidden_width = 128
53
  maxout_pieces = 3
54
  use_upper = true
55
  nO = null
@@ -129,7 +122,7 @@ upstream = "tok2vec"
129
  factory = "weighted_textcat"
130
  class_weights = [0.67,0.33]
131
  scorer = {"@scorers":"spacy.textcat_scorer.v2"}
132
- threshold = 0.0
133
 
134
  [components.textcat_classify.model]
135
  @architectures = "spacy.TextCatEnsemble.v2"
@@ -148,13 +141,47 @@ nO = null
148
 
149
  [components.textcat_classify.model.tok2vec.embed]
150
  @architectures = "spacy.MultiHashEmbed.v2"
151
- width = 128
152
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
153
- rows = [2000,500,1000,500,500]
154
  include_static_vectors = true
155
 
156
  [components.textcat_classify.model.tok2vec.encode]
157
  @architectures = "spacy.MaxoutWindowEncoder.v2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  width = 128
159
  window_size = 1
160
  maxout_pieces = 3
@@ -170,7 +197,7 @@ factory = "tok2vec"
170
  @architectures = "spacy.MultiHashEmbed.v2"
171
  width = 256
172
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
173
- rows = [5000,1000,2500,2500]
174
  include_static_vectors = true
175
 
176
  [components.tok2vec.model.encode]
@@ -232,8 +259,8 @@ patience = 1000
232
  max_epochs = 0
233
  max_steps = 20000
234
  eval_frequency = 100
235
- frozen_components = ["tagger","attribute_ruler","parser","tok2vec","ner"]
236
- annotating_components = ["ner"]
237
  before_to_disk = null
238
  before_update = null
239
 
@@ -287,22 +314,21 @@ eps = 0.00000001
287
  learn_rate = 0.001
288
 
289
  [training.score_weights]
290
- tag_acc = 0.2
291
- dep_uas = 0.1
292
- dep_las = 0.1
293
  dep_las_per_type = null
294
  sents_p = 0.0
295
  sents_r = 0.0
296
- sents_f = 0.2
297
- lemma_acc = 0.2
298
- ents_f = 0.1
299
  ents_p = 0.0
300
  ents_r = 0.0
301
  ents_per_type = null
302
- cats_score = 0.05
303
  cats_score_desc = null
304
  cats_micro_p = null
305
- cats_micro_r = 0.05
306
  cats_micro_f = null
307
  cats_macro_p = null
308
  cats_macro_r = null
 
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
10
+ base_model = "topic/upbeat-bush"
11
 
12
  [system]
13
  gpu_allocator = "pytorch"
 
15
 
16
  [nlp]
17
  lang = "en"
18
+ pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","senter","tok2vec","ner","textcat_multilabel","textcat_classify"]
19
  batch_size = 1000
20
  disabled = []
21
  before_creation = null
 
31
  scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
32
  validate = false
33
 
 
 
 
 
 
 
 
34
  [components.ner]
35
  factory = "ner"
36
  incorrect_spans_key = null
 
42
  @architectures = "spacy.TransitionBasedParser.v2"
43
  state_type = "ner"
44
  extra_state_tokens = false
45
+ hidden_width = 256
46
  maxout_pieces = 3
47
  use_upper = true
48
  nO = null
 
122
  factory = "weighted_textcat"
123
  class_weights = [0.67,0.33]
124
  scorer = {"@scorers":"spacy.textcat_scorer.v2"}
125
+ threshold = 0.3
126
 
127
  [components.textcat_classify.model]
128
  @architectures = "spacy.TextCatEnsemble.v2"
 
141
 
142
  [components.textcat_classify.model.tok2vec.embed]
143
  @architectures = "spacy.MultiHashEmbed.v2"
144
+ width = 256
145
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
146
+ rows = [4000,1000,2000,1000,1000]
147
  include_static_vectors = true
148
 
149
  [components.textcat_classify.model.tok2vec.encode]
150
  @architectures = "spacy.MaxoutWindowEncoder.v2"
151
+ width = 256
152
+ window_size = 1
153
+ maxout_pieces = 3
154
+ depth = 8
155
+
156
+ [components.textcat_multilabel]
157
+ factory = "textcat_multilabel"
158
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v2"}
159
+ threshold = 0.3
160
+
161
+ [components.textcat_multilabel.model]
162
+ @architectures = "spacy.TextCatEnsemble.v2"
163
+ nO = null
164
+
165
+ [components.textcat_multilabel.model.linear_model]
166
+ @architectures = "spacy.TextCatBOW.v3"
167
+ exclusive_classes = false
168
+ length = 262144
169
+ ngram_size = 1
170
+ no_output_layer = false
171
+ nO = null
172
+
173
+ [components.textcat_multilabel.model.tok2vec]
174
+ @architectures = "spacy.Tok2Vec.v2"
175
+
176
+ [components.textcat_multilabel.model.tok2vec.embed]
177
+ @architectures = "spacy.MultiHashEmbed.v2"
178
+ width = 128
179
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
180
+ rows = [2000,1000,1000,1000,1000]
181
+ include_static_vectors = true
182
+
183
+ [components.textcat_multilabel.model.tok2vec.encode]
184
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
185
  width = 128
186
  window_size = 1
187
  maxout_pieces = 3
 
197
  @architectures = "spacy.MultiHashEmbed.v2"
198
  width = 256
199
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
200
+ rows = [2000,1000,1000,1000]
201
  include_static_vectors = true
202
 
203
  [components.tok2vec.model.encode]
 
259
  max_epochs = 0
260
  max_steps = 20000
261
  eval_frequency = 100
262
+ frozen_components = ["tok2vec","ner","textcat_multilabel"]
263
+ annotating_components = ["ner","textcat_multilabel"]
264
  before_to_disk = null
265
  before_update = null
266
 
 
314
  learn_rate = 0.001
315
 
316
  [training.score_weights]
317
+ tag_acc = 0.25
318
+ dep_uas = 0.12
319
+ dep_las = 0.12
320
  dep_las_per_type = null
321
  sents_p = 0.0
322
  sents_r = 0.0
323
+ sents_f = 0.25
324
+ ents_f = 0.12
 
325
  ents_p = 0.0
326
  ents_r = 0.0
327
  ents_per_type = null
328
+ cats_score = 0.06
329
  cats_score_desc = null
330
  cats_micro_p = null
331
+ cats_micro_r = 0.06
332
  cats_micro_f = null
333
  cats_macro_p = null
334
  cats_macro_r = null
en_tako_query_analyzer-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e1d83f7eaa00d4dfb3414bed716f22e27a59e12006528fa8d240ef1ac36e02
3
- size 620143055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8de7f18d2cc25ababfa5d8131bb5a3d3e4ba8415066bf97493f4b388da4c6e9
3
+ size 649860398
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"en",
3
  "name":"tako_query_analyzer",
4
- "version":"0.0.3",
5
  "description":"",
6
  "author":"",
7
  "email":"",
@@ -120,9 +120,6 @@
120
  ],
121
  "attribute_ruler":[
122
 
123
- ],
124
- "lemmatizer":[
125
-
126
  ],
127
  "tok2vec":[
128
 
@@ -148,6 +145,16 @@
148
  "TIME",
149
  "WORK_OF_ART"
150
  ],
 
 
 
 
 
 
 
 
 
 
151
  "textcat_classify":[
152
  "ACCEPT",
153
  "REJECT"
@@ -159,9 +166,9 @@
159
  "parser",
160
  "attribute_ruler",
161
  "senter",
162
- "lemmatizer",
163
  "tok2vec",
164
  "ner",
 
165
  "textcat_classify"
166
  ],
167
  "components":[
@@ -170,9 +177,9 @@
170
  "parser",
171
  "attribute_ruler",
172
  "senter",
173
- "lemmatizer",
174
  "tok2vec",
175
  "ner",
 
176
  "textcat_classify"
177
  ],
178
  "disabled":[
@@ -183,28 +190,28 @@
183
  "ents_p":0.0,
184
  "ents_r":0.0,
185
  "ents_per_type":0.0,
186
- "cats_score":0.8507157464,
187
  "cats_score_desc":"F (ACCEPT)",
188
- "cats_micro_p":0.8531187123,
189
- "cats_micro_r":0.8531187123,
190
- "cats_micro_f":0.8531187123,
191
- "cats_macro_p":0.853485064,
192
- "cats_macro_r":0.8531187123,
193
- "cats_macro_f":0.8530806455,
194
- "cats_macro_auc":0.9167497439,
195
  "cats_f_per_type":{
196
  "ACCEPT":{
197
- "p":0.8648648649,
198
- "r":0.8370221328,
199
- "f":0.8507157464
200
  },
201
  "REJECT":{
202
- "p":0.8421052632,
203
- "r":0.8692152918,
204
- "f":0.8554455446
205
  }
206
  },
207
- "textcat_classify_loss":0.9403656576
208
  },
209
  "requirements":[
210
 
 
1
  {
2
  "lang":"en",
3
  "name":"tako_query_analyzer",
4
+ "version":"0.0.4",
5
  "description":"",
6
  "author":"",
7
  "email":"",
 
120
  ],
121
  "attribute_ruler":[
122
 
 
 
 
123
  ],
124
  "tok2vec":[
125
 
 
145
  "TIME",
146
  "WORK_OF_ART"
147
  ],
148
+ "textcat_multilabel":[
149
+ "Finance",
150
+ "Economics",
151
+ "Demographics",
152
+ "Health",
153
+ "Politics",
154
+ "Sports",
155
+ "Climate",
156
+ "Other"
157
+ ],
158
  "textcat_classify":[
159
  "ACCEPT",
160
  "REJECT"
 
166
  "parser",
167
  "attribute_ruler",
168
  "senter",
 
169
  "tok2vec",
170
  "ner",
171
+ "textcat_multilabel",
172
  "textcat_classify"
173
  ],
174
  "components":[
 
177
  "parser",
178
  "attribute_ruler",
179
  "senter",
 
180
  "tok2vec",
181
  "ner",
182
+ "textcat_multilabel",
183
  "textcat_classify"
184
  ],
185
  "disabled":[
 
190
  "ents_p":0.0,
191
  "ents_r":0.0,
192
  "ents_per_type":0.0,
193
+ "cats_score":0.8256157635,
194
  "cats_score_desc":"F (ACCEPT)",
195
+ "cats_micro_p":0.823,
196
+ "cats_micro_r":0.823,
197
+ "cats_micro_f":0.823,
198
+ "cats_macro_p":0.8232909619,
199
+ "cats_macro_r":0.823,
200
+ "cats_macro_f":0.822960166,
201
+ "cats_macro_auc":0.899297,
202
  "cats_f_per_type":{
203
  "ACCEPT":{
204
+ "p":0.813592233,
205
+ "r":0.838,
206
+ "f":0.8256157635
207
  },
208
  "REJECT":{
209
+ "p":0.8329896907,
210
+ "r":0.808,
211
+ "f":0.8203045685
212
  }
213
  },
214
+ "textcat_classify_loss":2.1820269685
215
  },
216
  "requirements":[
217
 
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d7b2908771676021059e0da7b19783682650daddfc0c44cd1a5a6ee8c90b8b
3
- size 768585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18b22cda1f4ebaa4786ba94b7baa9379ecca5475386ddd9c8b258717defc3a2
3
+ size 2715735
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"2":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"3":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"4":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�H{"0":{},"1":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"2":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"3":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110},"4":{"GPE":17437,"ORG":17001,"PERSON":11972,"DATE":11609,"PRODUCT":8691,"QUANTITY":7887,"NORP":6312,"MONEY":4756,"WORK_OF_ART":4307,"EVENT":3116,"PERCENT":2783,"LAW":1974,"CARDINAL":1241,"LOC":1209,"STOCK_TICKER":842,"FAC":793,"TIME":690,"ORDINAL":222,"LANGUAGE":110,"":1},"5":{"":1}}�cfg��neg_key�
textcat_classify/cfg CHANGED
@@ -3,6 +3,6 @@
3
  "ACCEPT",
4
  "REJECT"
5
  ],
6
- "threshold":0.0,
7
  "positive_label":"ACCEPT"
8
  }
 
3
  "ACCEPT",
4
  "REJECT"
5
  ],
6
+ "threshold":0.3,
7
  "positive_label":"ACCEPT"
8
  }
textcat_classify/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65c611aa01b463b7f99116d0b1a53cd75effb9d0bac5febef70bf3b85f0b075
3
- size 8319359
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f147a53892c746a8f24180b5870c414902c640b10f9ca2927ed2b91a8bf5fd
3
+ size 36071049
textcat_multilabel/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "Finance",
4
+ "Economics",
5
+ "Demographics",
6
+ "Health",
7
+ "Politics",
8
+ "Sports",
9
+ "Climate",
10
+ "Other"
11
+ ],
12
+ "threshold":0.3
13
+ }
textcat_multilabel/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c238f141aa54e070622e6506599093673815490619c560feee160d93c37a502a
3
+ size 15382175
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8db1e5a93c4f955f990b7f6005b11c65ac6b9efa20f2c02291ac2013d06a203
3
- size 34434008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f59ffdaeb93abafba597c7089b697ab873950f84b9b9bb5fe16022de2c9702
3
+ size 28290008
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56c5dbd010224ad5111a8f04ef793ae9a7a7c24aa420362353a2e5a1bcd1df31
3
- size 10521360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33316624b8e8ce9be5d4762929948a8c6bef9a9d6a138aae4110f40cd54858a3
3
+ size 10662076