Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

special_tokens_map.json +78 -12
tokenizer.json +0 -0
tokenizer_config.json +13 -12

special_tokens_map.json CHANGED Viewed

@@ -1,15 +1,81 @@
 {
   "additional_special_tokens": [
-    "<|source_id|>",
-    "<|source_analysis_start|>",
-    "<|source_analysis_end|>",
-    "<|source_start|>",
-    "<|source_end|>",
-    "<|answer_start|>",
-    "<|answer_end|>",
-    "<|query_start|>",
-    "<|query_end|>",
-    "<|source_interpretation_start|>",
-    "<|source_interpretation_end|>"
   ]
-}

 {
   "additional_special_tokens": [
+    {
+      "content": "<|source_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_analysis_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_analysis_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|answer_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|answer_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|query_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|query_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_interpretation_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|source_interpretation_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ]
+}

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -32,7 +32,7 @@
       "single_word": false,
       "special": true
     },
-    "65520": {
       "content": "<|source_id|>",
       "lstrip": false,
       "normalized": false,
@@ -40,7 +40,7 @@
       "single_word": false,
       "special": true
     },
-    "65521": {
       "content": "<|source_analysis_start|>",
       "lstrip": false,
       "normalized": false,
@@ -48,7 +48,7 @@
       "single_word": false,
       "special": true
     },
-    "65522": {
       "content": "<|source_analysis_end|>",
       "lstrip": false,
       "normalized": false,
@@ -56,7 +56,7 @@
       "single_word": false,
       "special": true
     },
-    "65523": {
       "content": "<|source_start|>",
       "lstrip": false,
       "normalized": false,
@@ -64,7 +64,7 @@
       "single_word": false,
       "special": true
     },
-    "65524": {
       "content": "<|source_end|>",
       "lstrip": false,
       "normalized": false,
@@ -72,7 +72,7 @@
       "single_word": false,
       "special": true
     },
-    "65525": {
       "content": "<|answer_start|>",
       "lstrip": false,
       "normalized": false,
@@ -80,7 +80,7 @@
       "single_word": false,
       "special": true
     },
-    "65526": {
       "content": "<|answer_end|>",
       "lstrip": false,
       "normalized": false,
@@ -88,7 +88,7 @@
       "single_word": false,
       "special": true
     },
-    "65527": {
       "content": "<|query_start|>",
       "lstrip": false,
       "normalized": false,
@@ -96,7 +96,7 @@
       "single_word": false,
       "special": true
     },
-    "65528": {
       "content": "<|query_end|>",
       "lstrip": false,
       "normalized": false,
@@ -104,7 +104,7 @@
       "single_word": false,
       "special": true
     },
-    "65529": {
       "content": "<|source_interpretation_start|>",
       "lstrip": false,
       "normalized": false,
@@ -112,7 +112,7 @@
       "single_word": false,
       "special": true
     },
-    "65530": {
       "content": "<|source_interpretation_end|>",
       "lstrip": false,
       "normalized": false,
@@ -135,6 +135,7 @@
     "<|source_interpretation_end|>"
   ],
   "clean_up_tokenization_spaces": true,
   "model_max_length": 1000000000000000019884624838656,
   "tokenizer_class": "PreTrainedTokenizerFast"
-}

       "single_word": false,
       "special": true
     },
+    "65525": {
       "content": "<|source_id|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65526": {
       "content": "<|source_analysis_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65527": {
       "content": "<|source_analysis_end|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65528": {
       "content": "<|source_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65529": {
       "content": "<|source_end|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65530": {
       "content": "<|answer_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65531": {
       "content": "<|answer_end|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65532": {
       "content": "<|query_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65533": {
       "content": "<|query_end|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65534": {
       "content": "<|source_interpretation_start|>",
       "lstrip": false,
       "normalized": false,
       "single_word": false,
       "special": true
     },
+    "65535": {
       "content": "<|source_interpretation_end|>",
       "lstrip": false,
       "normalized": false,
     "<|source_interpretation_end|>"
   ],
   "clean_up_tokenization_spaces": true,
+  "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
   "tokenizer_class": "PreTrainedTokenizerFast"
+}