PartAI
/

TookaBERT-Base

Model card Files Files and versions Community

mohalisad commited on May 6, 2024

Commit

5b40aa3

·

1 Parent(s): 57cffe3

fix tokenizer

Files changed (1) hide show

tokenizer.json +17 -3

tokenizer.json CHANGED Viewed

@@ -1,7 +1,14 @@
 {
   "version": "1.0",
   "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,
@@ -45,7 +52,7 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false,
       "special": true
     }
   ],
@@ -194,6 +201,13 @@
           "Regex": "[\u0001‪‫‬‭‎‏‮⁯️⃣]"
         },
         "content": " "
       }
     ]
   },
@@ -96062,4 +96076,4 @@
       "▁ent ire"
     ]
   }
-}

 {
   "version": "1.0",
   "truncation": null,
+  "padding": {
+    "strategy": "BatchLongest",
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "<pad>"
+  },
   "added_tokens": [
     {
       "id": 0,
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": true,
       "special": true
     }
   ],
           "Regex": "[\u0001‪‫‬‭‎‏‮⁯️⃣]"
         },
         "content": " "
+      },
+      {
+        "type": "Replace",
+        "pattern": {
+          "Regex": " *<mask> *"
+        },
+        "content": "<mask>"
       }
     ]
   },
       "▁ent ire"
     ]
   }
+}