noahjax commited on Mar 2

Commit

16849dc

verified ·

1 Parent(s): 41a8235

Update spaCy pipeline

Browse files

Files changed (20) hide show

.gitattributes +5 -0
README.md +24 -24
attribute_ruler/patterns +0 -0
config.cfg +43 -21
custom_textcat.py +142 -0
en_tako_query_analyzer-any-py3-none-any.whl +2 -2
meta.json +143 -190
ner/model +0 -0
ner/moves +1 -1
parser/cfg +13 -0
parser/model +3 -0
parser/moves +1 -0
tagger/cfg +57 -0
tagger/model +0 -0
textcat_classify/cfg +8 -0
textcat_classify/model +3 -0
tok2vec/model +1 -1
tok2vec_small/cfg +3 -0
tok2vec_small/model +3 -0
vocab/strings.json +0 -0

.gitattributes CHANGED Viewed

@@ -38,3 +38,8 @@ textcat/model filter=lfs diff=lfs merge=lfs -text
 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 vocab/key2row filter=lfs diff=lfs merge=lfs -text
 vocab/vectors filter=lfs diff=lfs merge=lfs -text

 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 vocab/key2row filter=lfs diff=lfs merge=lfs -text
 vocab/vectors filter=lfs diff=lfs merge=lfs -text
+ner/model filter=lfs diff=lfs merge=lfs -text
+parser/model filter=lfs diff=lfs merge=lfs -text
+textcat_classify/model filter=lfs diff=lfs merge=lfs -text
+tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
+vocab/strings.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 tags:
 - spacy
 - token-classification
-- text-classification
 language:
 - en
 model-index:
@@ -14,21 +13,21 @@ model-index:
     metrics:
     - name: NER Precision
       type: precision
-      value: 0.7170177384
     - name: NER Recall
       type: recall
-      value: 0.7172165234
     - name: NER F Score
       type: f_score
-      value: 0.7171171171
 ---
 | Feature | Description |
 | --- | --- |
 | **Name** | `en_tako_query_analyzer` |
-| **Version** | `0.0.1` |
 | **spaCy** | `>=3.7.5,<3.8.0` |
-| **Default Pipeline** | `tok2vec`, `ner`, `textcat` |
-| **Components** | `tok2vec`, `ner`, `textcat` |
 | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
@@ -38,12 +37,14 @@ model-index:
 <details>
-<summary>View label scheme (33 labels for 2 components)</summary>
 | Component | Labels |
 | --- | --- |
-| **`ner`** | `CARDINAL`, `CUSTOM_ATTRIBUTE`, `CUSTOM_SEMANTIC_FUNCTION`, `CUSTOM_SPORTS_CONFERENCE`, `CUSTOM_SPORTS_LEAGUE`, `CUSTOM_SPORTS_ROLE`, `CUSTOM_STOCK_TICKER`, `CUSTOM_TEAM`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
-| **`textcat`** | `Business and Finance`, `Arts, Culture, and Entertainment`, `Crime`, `Sports`, `Politics`, `Science and Technology`, `Health and Wellness`, `Lifestyle and Fashion` |
 </details>
@@ -51,17 +52,16 @@ model-index:
 | Type | Score |
 | --- | --- |
-| `ENTS_F` | 71.71 |
-| `ENTS_P` | 71.70 |
-| `ENTS_R` | 71.72 |
-| `CATS_SCORE` | 70.53 |
-| `CATS_MICRO_P` | 85.89 |
-| `CATS_MICRO_R` | 85.89 |
-| `CATS_MICRO_F` | 85.89 |
-| `CATS_MACRO_P` | 74.89 |
-| `CATS_MACRO_R` | 67.56 |
-| `CATS_MACRO_F` | 70.53 |
-| `CATS_MACRO_AUC` | 93.04 |
-| `TOK2VEC_LOSS` | 61786.52 |
-| `NER_LOSS` | 46852.50 |
-| `TEXTCAT_LOSS` | 1.09 |

 tags:
 - spacy
 - token-classification
 language:
 - en
 model-index:
     metrics:
     - name: NER Precision
       type: precision
+      value: 0.0
     - name: NER Recall
       type: recall
+      value: 0.0
     - name: NER F Score
       type: f_score
+      value: 0.0
 ---
 | Feature | Description |
 | --- | --- |
 | **Name** | `en_tako_query_analyzer` |
+| **Version** | `0.0.2` |
 | **spaCy** | `>=3.7.5,<3.8.0` |
+| **Default Pipeline** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `tok2vec`, `ner`, `textcat_classify` |
+| **Components** | `tok2vec_small`, `tagger`, `parser`, `attribute_ruler`, `tok2vec`, `ner`, `textcat_classify` |
 | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
 <details>
+<summary>View label scheme (116 labels for 4 components)</summary>
 | Component | Labels |
 | --- | --- |
+| **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
+| **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
+| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
+| **`textcat_classify`** | `ACCEPT`, `REJECT` |
 </details>
 | Type | Score |
 | --- | --- |
+| `ENTS_F` | 0.00 |
+| `ENTS_P` | 0.00 |
+| `ENTS_R` | 0.00 |
+| `ENTS_PER_TYPE` | 0.00 |
+| `CATS_SCORE` | 85.07 |
+| `CATS_MICRO_P` | 85.31 |
+| `CATS_MICRO_R` | 85.31 |
+| `CATS_MICRO_F` | 85.31 |
+| `CATS_MACRO_P` | 85.35 |
+| `CATS_MACRO_R` | 85.31 |
+| `CATS_MACRO_F` | 85.31 |
+| `CATS_MACRO_AUC` | 91.67 |
+| `TEXTCAT_CLASSIFY_LOSS` | 94.04 |

attribute_ruler/patterns ADDED Viewed

Binary file (14.7 kB). View file

config.cfg CHANGED Viewed

@@ -1,12 +1,13 @@
 [paths]
-train = "corpus/ner-train.spacy"
-dev = "corpus/ner-test.spacy"
 vectors = "en_core_web_lg"
 init_tok2vec = null
 [variables]
-wandb_project_name = "tako-entity-extractor"
 wandb_team_name = "tako-team"
 [system]
 gpu_allocator = "pytorch"
@@ -14,7 +15,7 @@ seed = 0
 [nlp]
 lang = "en"
-pipeline = ["tok2vec","ner","textcat"]
 batch_size = 1000
 disabled = []
 before_creation = null
@@ -43,19 +44,20 @@ nO = null
 [components.ner.model.tok2vec]
 @architectures = "spacy.Tok2VecListener.v1"
-width = ${components.tok2vec.model.encode.width}
 upstream = "*"
-[components.textcat]
-factory = "textcat"
 scorer = {"@scorers":"spacy.textcat_scorer.v2"}
 threshold = 0.0
-[components.textcat.model]
 @architectures = "spacy.TextCatEnsemble.v2"
 nO = null
-[components.textcat.model.linear_model]
 @architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
 length = 262144
@@ -63,10 +65,22 @@ ngram_size = 1
 no_output_layer = false
 nO = null
-[components.textcat.model.tok2vec]
-@architectures = "spacy.Tok2VecListener.v1"
-width = ${components.tok2vec.model.encode.width}
-upstream = "*"
 [components.tok2vec]
 factory = "tok2vec"
@@ -76,7 +90,7 @@ factory = "tok2vec"
 [components.tok2vec.model.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
-width = ${components.tok2vec.model.encode.width}
 attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
 rows = [5000,1000,2500,2500]
 include_static_vectors = true
@@ -119,8 +133,8 @@ accumulate_gradient = 1
 patience = 1000
 max_epochs = 0
 max_steps = 20000
-eval_frequency = 25
-frozen_components = []
 annotating_components = ["ner"]
 before_to_disk = null
 before_update = null
@@ -132,7 +146,7 @@ get_length = null
 [training.batcher.size]
 @schedules = "compounding.v1"
 start = 100
-stop = 1000
 compound = 1.001
 t = 0.0
@@ -175,14 +189,14 @@ eps = 0.00000001
 learn_rate = 0.001
 [training.score_weights]
-ents_f = 0.2
 ents_p = 0.0
-ents_r = 0.3
 ents_per_type = null
-cats_score = 0.5
 cats_score_desc = null
 cats_micro_p = null
-cats_micro_r = null
 cats_micro_f = null
 cats_macro_p = null
 cats_macro_r = null
@@ -202,4 +216,12 @@ after_init = null
 [initialize.components]
 [initialize.tokenizer]

 [paths]
+train = "corpus/filter-train.spacy"
+dev = "corpus/filter-test.spacy"
 vectors = "en_core_web_lg"
 init_tok2vec = null
 [variables]
+wandb_project_name = "tako-query-filter"
 wandb_team_name = "tako-team"
+base_model = "ner/dashing-wind"
 [system]
 gpu_allocator = "pytorch"
 [nlp]
 lang = "en"
+pipeline = ["tok2vec","ner","textcat_classify"]
 batch_size = 1000
 disabled = []
 before_creation = null
 [components.ner.model.tok2vec]
 @architectures = "spacy.Tok2VecListener.v1"
+width = 256
 upstream = "*"
+[components.textcat_classify]
+factory = "weighted_textcat"
+class_weights = [0.67,0.33]
 scorer = {"@scorers":"spacy.textcat_scorer.v2"}
 threshold = 0.0
+[components.textcat_classify.model]
 @architectures = "spacy.TextCatEnsemble.v2"
 nO = null
+[components.textcat_classify.model.linear_model]
 @architectures = "spacy.TextCatBOW.v3"
 exclusive_classes = false
 length = 262144
 no_output_layer = false
 nO = null
+[components.textcat_classify.model.tok2vec]
+@architectures = "spacy.Tok2Vec.v2"
+[components.textcat_classify.model.tok2vec.embed]
+@architectures = "spacy.MultiHashEmbed.v2"
+width = 128
+attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
+rows = [2000,500,1000,500,500]
+include_static_vectors = true
+[components.textcat_classify.model.tok2vec.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v2"
+width = 128
+window_size = 1
+maxout_pieces = 3
+depth = 4
 [components.tok2vec]
 factory = "tok2vec"
 [components.tok2vec.model.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
+width = 256
 attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
 rows = [5000,1000,2500,2500]
 include_static_vectors = true
 patience = 1000
 max_epochs = 0
 max_steps = 20000
+eval_frequency = 100
+frozen_components = ["tagger","attribute_ruler","parser","tok2vec","ner"]
 annotating_components = ["ner"]
 before_to_disk = null
 before_update = null
 [training.batcher.size]
 @schedules = "compounding.v1"
 start = 100
+stop = 2000
 compound = 1.001
 t = 0.0
 learn_rate = 0.001
 [training.score_weights]
+ents_f = 0.5
 ents_p = 0.0
+ents_r = 0.0
 ents_per_type = null
+cats_score = 0.25
 cats_score_desc = null
 cats_micro_p = null
+cats_micro_r = 0.25
 cats_micro_f = null
 cats_macro_p = null
 cats_macro_r = null
 [initialize.components]
+[initialize.components.textcat_classify]
+positive_label = "ACCEPT"
+[initialize.components.textcat_classify.labels]
+@readers = "spacy.read_labels.v1"
+path = "corpus/labels/filter-labels/textcat_classify.json"
+require = false
 [initialize.tokenizer]

custom_textcat.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+from spacy.util import registry
+from thinc.types import Floats2d
+from spacy.tokens import Doc
+from spacy.pipeline import TextCategorizer
+from spacy.training import Example, validate_examples
+from spacy.pipeline.textcat import textcat_score
+from spacy.vocab import Vocab
+from spacy.scorer import Scorer
+from spacy.language import Language
+from thinc.api import Model
+import numpy
+@Language.factory(
+    "weighted_textcat",
+    assigns=["doc.cats"],
+    default_config={
+        "threshold": 0.0,
+        "scorer": {"@scorers": "spacy.textcat_scorer.v2"},
+    },
+    default_score_weights={
+        "cats_score": 1.0,
+        "cats_score_desc": None,
+        "cats_micro_p": None,
+        "cats_micro_r": None,
+        "cats_micro_f": None,
+        "cats_macro_p": None,
+        "cats_macro_r": None,
+        "cats_macro_f": None,
+        "cats_macro_auc": None,
+        "cats_f_per_type": None,
+    },
+)
+def make_textcat(
+    nlp: Language,
+    name: str,
+    model: Model[List[Doc], List[Floats2d]],
+    threshold: float,
+    scorer: Optional[Callable],
+    class_weights: Optional[List],
+) -> "TextCategorizer":
+    """Create a TextCategorizer component. The text categorizer predicts categories
+    over a whole document. It can learn one or more labels, and the labels are considered
+    to be mutually exclusive (i.e. one true label per doc).
+    model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
+        scores for each category.
+    threshold (float): Cutoff to consider a prediction "positive".
+    scorer (Optional[Callable]): The scoring method.
+    """
+    if class_weights == "null":
+        class_weights = None
+    return CustomTextcat(
+        nlp.vocab,
+        model,
+        name,
+        threshold=threshold,
+        scorer=scorer,
+        weights=class_weights,
+    )
+def textcat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+    return Scorer.score_cats(
+        examples,
+        "cats",
+        multi_label=False,
+        **kwargs,
+    )
+@registry.scorers("spacy.textcat_scorer.v2")
+def make_textcat_scorer():
+    return textcat_score
+class CustomTextcat(TextCategorizer):
+    def __init__(
+        self,
+        vocab: Vocab,
+        model: Model,
+        name: str = "textcat",
+        *,
+        threshold: float,
+        scorer: Optional[Callable] = textcat_score,
+        weights: Optional[List[float]] = None,
+    ) -> None:
+        """Initialize a text categorizer for single-label classification.
+        vocab (Vocab): The shared vocabulary.
+        model (thinc.api.Model): The Thinc Model powering the pipeline component.
+        name (str): The component instance name, used to add entries to the
+            losses during training.
+        threshold (float): Unused, not needed for single-label (exclusive
+            classes) classification.
+        scorer (Optional[Callable]): The scoring method. Defaults to
+                Scorer.score_cats for the attribute "cats".
+        DOCS: https://spacy.io/api/textcategorizer#init
+        """
+        self.vocab = vocab
+        self.model = model
+        self.name = name
+        self._rehearsal_model = None
+        cfg: Dict[str, Any] = {
+            "labels": [],
+            "threshold": threshold,
+            "positive_label": None,
+        }
+        self.cfg = dict(cfg)
+        self.scorer = scorer
+        if weights is not None:
+            print(f"Using weights: {weights}")
+            self.weights = numpy.array(weights)
+    def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
+        """Find the loss and gradient of loss for the batch of documents and
+        their predicted scores.
+        examples (Iterable[Examples]): The batch of examples.
+        scores: Scores representing the model's predictions.
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        DOCS: https://spacy.io/api/textcategorizer#get_loss
+        """
+        validate_examples(examples, "TextCategorizer.get_loss")
+        self._validate_categories(examples)
+        truths, not_missing = self._examples_to_truth(examples)
+        not_missing = self.model.ops.asarray(not_missing)  # type: ignore
+        d_scores = scores - truths
+        d_scores *= not_missing
+        weights = self.model.ops.asarray(self.weights)  # type: ignore
+        if weights is not None:
+            squared = d_scores**2
+            mean_square_error = numpy.sum(squared * weights) / (
+                numpy.sum(weights) * len(squared)
+            )
+            d_scores *= weights
+        else:
+            mean_square_error = (d_scores**2).mean()
+        return float(mean_square_error), d_scores

en_tako_query_analyzer-any-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9176c11237026aad73a7bfa404223c1173e986b0cbd3679485c931bbb1399ddd
-size 608751851

 version https://git-lfs.github.com/spec/v1
+oid sha256:758f2f483a1f44bf0ff426f5c5e2abf5867e859261672a51f8759e97ca667a31
+size 619535137

meta.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "lang":"en",
   "name":"tako_query_analyzer",
-  "version":"0.0.1",
   "description":"",
   "author":"",
   "email":"",
@@ -16,18 +16,116 @@
     "name":"en_vectors"
   },
   "labels":{
     "tok2vec":[
     ],
     "ner":[
       "CARDINAL",
-      "CUSTOM_ATTRIBUTE",
-      "CUSTOM_SEMANTIC_FUNCTION",
-      "CUSTOM_SPORTS_CONFERENCE",
-      "CUSTOM_SPORTS_LEAGUE",
-      "CUSTOM_SPORTS_ROLE",
-      "CUSTOM_STOCK_TICKER",
-      "CUSTOM_TEAM",
       "DATE",
       "EVENT",
       "FAC",
@@ -43,208 +141,63 @@
       "PERSON",
       "PRODUCT",
       "QUANTITY",
       "TIME",
       "WORK_OF_ART"
     ],
-    "textcat":[
-      "Business and Finance",
-      "Arts, Culture, and Entertainment",
-      "Crime",
-      "Sports",
-      "Politics",
-      "Science and Technology",
-      "Health and Wellness",
-      "Lifestyle and Fashion"
     ]
   },
   "pipeline":[
     "tok2vec",
     "ner",
-    "textcat"
   ],
   "components":[
     "tok2vec",
     "ner",
-    "textcat"
   ],
   "disabled":[
   ],
   "performance":{
-    "ents_f":0.7171171171,
-    "ents_p":0.7170177384,
-    "ents_r":0.7172165234,
-    "ents_per_type":{
-      "CUSTOM_STOCK_TICKER":{
-        "p":0.5652173913,
-        "r":0.6842105263,
-        "f":0.619047619
-      },
-      "EVENT":{
-        "p":0.5733333333,
-        "r":0.4257425743,
-        "f":0.4886363636
-      },
-      "CUSTOM_TEAM":{
-        "p":0.9032258065,
-        "r":0.8358208955,
-        "f":0.8682170543
-      },
-      "CUSTOM_ATTRIBUTE":{
-        "p":0.6078998073,
-        "r":0.5998098859,
-        "f":0.6038277512
-      },
-      "GPE":{
-        "p":0.8605898123,
-        "r":0.9119318182,
-        "f":0.8855172414
-      },
-      "ORG":{
-        "p":0.649321267,
-        "r":0.7377892031,
-        "f":0.6907340554
-      },
-      "DATE":{
-        "p":0.8639618138,
-        "r":0.8894348894,
-        "f":0.8765133172
-      },
-      "CUSTOM_SEMANTIC_FUNCTION":{
-        "p":0.7647058824,
-        "r":0.6713615023,
-        "f":0.715
-      },
-      "LOC":{
-        "p":0.6785714286,
-        "r":0.4418604651,
-        "f":0.5352112676
-      },
-      "MONEY":{
-        "p":0.7894736842,
-        "r":0.8181818182,
-        "f":0.8035714286
-      },
-      "PRODUCT":{
-        "p":0.5107526882,
-        "r":0.5053191489,
-        "f":0.5080213904
-      },
-      "LANGUAGE":{
-        "p":0.0,
-        "r":0.0,
-        "f":0.0
-      },
-      "PERSON":{
-        "p":0.8941176471,
-        "r":0.8260869565,
-        "f":0.8587570621
-      },
-      "QUANTITY":{
-        "p":0.6111111111,
-        "r":0.6111111111,
-        "f":0.6111111111
-      },
-      "NORP":{
-        "p":0.5588235294,
-        "r":0.5,
-        "f":0.5277777778
-      },
-      "LAW":{
-        "p":0.8571428571,
-        "r":0.3529411765,
-        "f":0.5
-      },
-      "CUSTOM_SPORTS_LEAGUE":{
-        "p":0.9642857143,
-        "r":0.9473684211,
-        "f":0.9557522124
-      },
-      "CUSTOM_SPORTS_ROLE":{
-        "p":0.9444444444,
-        "r":0.7391304348,
-        "f":0.8292682927
-      },
-      "CARDINAL":{
-        "p":0.5,
-        "r":0.1111111111,
-        "f":0.1818181818
-      },
-      "PERCENT":{
-        "p":1.0,
-        "r":0.1428571429,
-        "f":0.25
-      },
-      "FAC":{
-        "p":0.1428571429,
-        "r":0.1,
-        "f":0.1176470588
-      },
-      "ORDINAL":{
-        "p":0.0,
-        "r":0.0,
-        "f":0.0
-      },
-      "WORK_OF_ART":{
-        "p":0.0,
-        "r":0.0,
-        "f":0.0
-      }
-    },
-    "cats_score":0.7053147985,
-    "cats_score_desc":"macro F",
-    "cats_micro_p":0.8588957055,
-    "cats_micro_r":0.8588957055,
-    "cats_micro_f":0.8588957055,
-    "cats_macro_p":0.7489478628,
-    "cats_macro_r":0.6756235592,
-    "cats_macro_f":0.7053147985,
-    "cats_macro_auc":0.9304171084,
     "cats_f_per_type":{
-      "Business and Finance":{
-        "p":0.8983516484,
-        "r":0.9450867052,
-        "f":0.9211267606
-      },
-      "Arts, Culture, and Entertainment":{
-        "p":0.5,
-        "r":0.4772727273,
-        "f":0.488372093
-      },
-      "Crime":{
-        "p":0.9090909091,
-        "r":0.5555555556,
-        "f":0.6896551724
-      },
-      "Sports":{
-        "p":0.965034965,
-        "r":0.8846153846,
-        "f":0.9230769231
-      },
-      "Politics":{
-        "p":0.9125,
-        "r":0.8538011696,
-        "f":0.8821752266
-      },
-      "Science and Technology":{
-        "p":0.6853932584,
-        "r":0.6931818182,
-        "f":0.6892655367
-      },
-      "Health and Wellness":{
-        "p":0.7878787879,
-        "r":0.7647058824,
-        "f":0.776119403
-      },
-      "Lifestyle and Fashion":{
-        "p":0.3333333333,
-        "r":0.2307692308,
-        "f":0.2727272727
       }
     },
-    "tok2vec_loss":617.8652423046,
-    "ner_loss":468.5249541622,
-    "textcat_loss":0.0108695821
   },
   "requirements":[

 {
   "lang":"en",
   "name":"tako_query_analyzer",
+  "version":"0.0.2",
   "description":"",
   "author":"",
   "email":"",
     "name":"en_vectors"
   },
   "labels":{
+    "tok2vec_small":[
+    ],
+    "tagger":[
+      "$",
+      "''",
+      ",",
+      "-LRB-",
+      "-RRB-",
+      ".",
+      ":",
+      "ADD",
+      "AFX",
+      "CC",
+      "CD",
+      "DT",
+      "EX",
+      "FW",
+      "HYPH",
+      "IN",
+      "JJ",
+      "JJR",
+      "JJS",
+      "LS",
+      "MD",
+      "NFP",
+      "NN",
+      "NNP",
+      "NNPS",
+      "NNS",
+      "PDT",
+      "POS",
+      "PRP",
+      "PRP$",
+      "RB",
+      "RBR",
+      "RBS",
+      "RP",
+      "SYM",
+      "TO",
+      "UH",
+      "VB",
+      "VBD",
+      "VBG",
+      "VBN",
+      "VBP",
+      "VBZ",
+      "WDT",
+      "WP",
+      "WP$",
+      "WRB",
+      "XX",
+      "_SP",
+      "``"
+    ],
+    "parser":[
+      "ROOT",
+      "acl",
+      "acomp",
+      "advcl",
+      "advmod",
+      "agent",
+      "amod",
+      "appos",
+      "attr",
+      "aux",
+      "auxpass",
+      "case",
+      "cc",
+      "ccomp",
+      "compound",
+      "conj",
+      "csubj",
+      "csubjpass",
+      "dative",
+      "dep",
+      "det",
+      "dobj",
+      "expl",
+      "intj",
+      "mark",
+      "meta",
+      "neg",
+      "nmod",
+      "npadvmod",
+      "nsubj",
+      "nsubjpass",
+      "nummod",
+      "oprd",
+      "parataxis",
+      "pcomp",
+      "pobj",
+      "poss",
+      "preconj",
+      "predet",
+      "prep",
+      "prt",
+      "punct",
+      "quantmod",
+      "relcl",
+      "xcomp"
+    ],
+    "attribute_ruler":[
+    ],
     "tok2vec":[
     ],
     "ner":[
       "CARDINAL",
       "DATE",
       "EVENT",
       "FAC",
       "PERSON",
       "PRODUCT",
       "QUANTITY",
+      "STOCK_TICKER",
       "TIME",
       "WORK_OF_ART"
     ],
+    "textcat_classify":[
+      "ACCEPT",
+      "REJECT"
     ]
   },
   "pipeline":[
+    "tok2vec_small",
+    "tagger",
+    "parser",
+    "attribute_ruler",
     "tok2vec",
     "ner",
+    "textcat_classify"
   ],
   "components":[
+    "tok2vec_small",
+    "tagger",
+    "parser",
+    "attribute_ruler",
     "tok2vec",
     "ner",
+    "textcat_classify"
   ],
   "disabled":[
   ],
   "performance":{
+    "ents_f":0.0,
+    "ents_p":0.0,
+    "ents_r":0.0,
+    "ents_per_type":0.0,
+    "cats_score":0.8507157464,
+    "cats_score_desc":"F (ACCEPT)",
+    "cats_micro_p":0.8531187123,
+    "cats_micro_r":0.8531187123,
+    "cats_micro_f":0.8531187123,
+    "cats_macro_p":0.853485064,
+    "cats_macro_r":0.8531187123,
+    "cats_macro_f":0.8530806455,
+    "cats_macro_auc":0.9167497439,
     "cats_f_per_type":{
+      "ACCEPT":{
+        "p":0.8648648649,
+        "r":0.8370221328,
+        "f":0.8507157464
+      },
+      "REJECT":{
+        "p":0.8421052632,
+        "r":0.8692152918,
+        "f":0.8554455446
       }
     },
+    "textcat_classify_loss":0.9403656576
   },
   "requirements":[

ner/model CHANGED Viewed

Binary files a/ner/model and b/ner/model differ

ner/moves CHANGED Viewed

@@ -1 +1 @@

- ��moves~~�l~~{"0":{},"1":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"2":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"3":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"4":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":5,"~~ORDINAL~~":~~2,"":~~1},"5":{"":1}}�cfg��neg_key�

+ ��moves��{"0":{},"1":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"2":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"3":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"4":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25,"":1},"5":{"":1}}�cfg��neg_key�

parser/cfg ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "moves":null,
+  "update_with_oracle_cut_size":100,
+  "multitasks":[
+  ],
+  "min_action_freq":30,
+  "learn_tokens":false,
+  "beam_width":1,
+  "beam_density":0.0,
+  "beam_update_prob":0.0,
+  "incorrect_spans_key":null
+}

parser/model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1836fbc02b3924b2fd5f65325c58ae852ff112db1090ca724e5a801e68b85fd
+size 319909

parser/moves ADDED Viewed

	@@ -0,0 +1 @@

+ ��moves�{"0":{"":994332},"1":{"":999432},"2":{"det":172595,"nsubj":165748,"compound":116623,"amod":105184,"aux":86667,"punct":65478,"advmod":62763,"poss":36443,"mark":27941,"nummod":22598,"auxpass":15594,"prep":14001,"nsubjpass":13856,"neg":12357,"cc":10739,"nmod":9562,"advcl":9062,"npadvmod":8168,"quantmod":7101,"intj":6464,"ccomp":5896,"dobj":3427,"expl":3360,"dep":2871,"predet":1944,"parataxis":1837,"csubj":1428,"preconj":621,"pobj||prep":616,"attr":578,"meta":376,"advmod||conj":368,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183790,"pobj":182191,"prep":174008,"dobj":89615,"conj":59687,"cc":51930,"ccomp":30385,"advmod":22861,"xcomp":21021,"relcl":20969,"advcl":19828,"attr":17741,"acomp":16922,"appos":15265,"case":13388,"acl":12085,"pcomp":10324,"dep":10116,"npadvmod":9796,"prt":8179,"agent":3903,"dative":3866,"nsubj":3470,"neg":2906,"amod":2839,"intj":2819,"nummod":2732,"oprd":2301,"parataxis":1261,"quantmod":319,"nmod":294,"acl||dobj":200,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":92,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"relcl||pobj":42,"acl||nsubjpass":42,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":111664}}�cfg��neg_key�

tagger/cfg ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "label_smoothing":0.0,
+  "labels":[
+    "$",
+    "''",
+    ",",
+    "-LRB-",
+    "-RRB-",
+    ".",
+    ":",
+    "ADD",
+    "AFX",
+    "CC",
+    "CD",
+    "DT",
+    "EX",
+    "FW",
+    "HYPH",
+    "IN",
+    "JJ",
+    "JJR",
+    "JJS",
+    "LS",
+    "MD",
+    "NFP",
+    "NN",
+    "NNP",
+    "NNPS",
+    "NNS",
+    "PDT",
+    "POS",
+    "PRP",
+    "PRP$",
+    "RB",
+    "RBR",
+    "RBS",
+    "RP",
+    "SYM",
+    "TO",
+    "UH",
+    "VB",
+    "VBD",
+    "VBG",
+    "VBN",
+    "VBP",
+    "VBZ",
+    "WDT",
+    "WP",
+    "WP$",
+    "WRB",
+    "XX",
+    "_SP",
+    "``"
+  ],
+  "neg_prefix":"!",
+  "overwrite":false
+}

tagger/model ADDED Viewed

Binary file (19.8 kB). View file

textcat_classify/cfg ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "labels":[
+    "ACCEPT",
+    "REJECT"
+  ],
+  "threshold":0.0,
+  "positive_label":"ACCEPT"
+}

textcat_classify/model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c65c611aa01b463b7f99116d0b1a53cd75effb9d0bac5febef70bf3b85f0b075
+size 8319359

tok2vec/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26ab00bd800730dbd5328c6603e549fd33426b18a10ca6c4efd1bf2e68c7e84
 size 34434008

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8db1e5a93c4f955f990b7f6005b11c65ac6b9efa20f2c02291ac2013d06a203
 size 34434008

tok2vec_small/cfg ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ {
2	+
3	+ }

tok2vec_small/model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42d8414521eaf75f817bd1b351b26039a22a912bb2617f95ead305420f2ebffd
+size 6269370

vocab/strings.json CHANGED Viewed

The diff for this file is too large to render. See raw diff