Pclanglais commited on
Commit
34892c0
·
verified ·
1 Parent(s): e77ca24

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +78 -12
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +13 -12
special_tokens_map.json CHANGED
@@ -1,15 +1,81 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|source_id|>",
4
- "<|source_analysis_start|>",
5
- "<|source_analysis_end|>",
6
- "<|source_start|>",
7
- "<|source_end|>",
8
- "<|answer_start|>",
9
- "<|answer_end|>",
10
- "<|query_start|>",
11
- "<|query_end|>",
12
- "<|source_interpretation_start|>",
13
- "<|source_interpretation_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ]
15
- }
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|source_id|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|source_analysis_start|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|source_analysis_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|source_start|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|source_end|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|answer_start|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|answer_end|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|query_start|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<|query_end|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<|source_interpretation_start|>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<|source_interpretation_end|>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ }
80
  ]
81
+ }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -32,7 +32,7 @@
32
  "single_word": false,
33
  "special": true
34
  },
35
- "65520": {
36
  "content": "<|source_id|>",
37
  "lstrip": false,
38
  "normalized": false,
@@ -40,7 +40,7 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "65521": {
44
  "content": "<|source_analysis_start|>",
45
  "lstrip": false,
46
  "normalized": false,
@@ -48,7 +48,7 @@
48
  "single_word": false,
49
  "special": true
50
  },
51
- "65522": {
52
  "content": "<|source_analysis_end|>",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,7 +56,7 @@
56
  "single_word": false,
57
  "special": true
58
  },
59
- "65523": {
60
  "content": "<|source_start|>",
61
  "lstrip": false,
62
  "normalized": false,
@@ -64,7 +64,7 @@
64
  "single_word": false,
65
  "special": true
66
  },
67
- "65524": {
68
  "content": "<|source_end|>",
69
  "lstrip": false,
70
  "normalized": false,
@@ -72,7 +72,7 @@
72
  "single_word": false,
73
  "special": true
74
  },
75
- "65525": {
76
  "content": "<|answer_start|>",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +80,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "65526": {
84
  "content": "<|answer_end|>",
85
  "lstrip": false,
86
  "normalized": false,
@@ -88,7 +88,7 @@
88
  "single_word": false,
89
  "special": true
90
  },
91
- "65527": {
92
  "content": "<|query_start|>",
93
  "lstrip": false,
94
  "normalized": false,
@@ -96,7 +96,7 @@
96
  "single_word": false,
97
  "special": true
98
  },
99
- "65528": {
100
  "content": "<|query_end|>",
101
  "lstrip": false,
102
  "normalized": false,
@@ -104,7 +104,7 @@
104
  "single_word": false,
105
  "special": true
106
  },
107
- "65529": {
108
  "content": "<|source_interpretation_start|>",
109
  "lstrip": false,
110
  "normalized": false,
@@ -112,7 +112,7 @@
112
  "single_word": false,
113
  "special": true
114
  },
115
- "65530": {
116
  "content": "<|source_interpretation_end|>",
117
  "lstrip": false,
118
  "normalized": false,
@@ -135,6 +135,7 @@
135
  "<|source_interpretation_end|>"
136
  ],
137
  "clean_up_tokenization_spaces": true,
 
138
  "model_max_length": 1000000000000000019884624838656,
139
  "tokenizer_class": "PreTrainedTokenizerFast"
140
- }
 
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "65525": {
36
  "content": "<|source_id|>",
37
  "lstrip": false,
38
  "normalized": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "65526": {
44
  "content": "<|source_analysis_start|>",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "65527": {
52
  "content": "<|source_analysis_end|>",
53
  "lstrip": false,
54
  "normalized": false,
 
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "65528": {
60
  "content": "<|source_start|>",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "65529": {
68
  "content": "<|source_end|>",
69
  "lstrip": false,
70
  "normalized": false,
 
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "65530": {
76
  "content": "<|answer_start|>",
77
  "lstrip": false,
78
  "normalized": false,
 
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "65531": {
84
  "content": "<|answer_end|>",
85
  "lstrip": false,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "65532": {
92
  "content": "<|query_start|>",
93
  "lstrip": false,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "65533": {
100
  "content": "<|query_end|>",
101
  "lstrip": false,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "65534": {
108
  "content": "<|source_interpretation_start|>",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "65535": {
116
  "content": "<|source_interpretation_end|>",
117
  "lstrip": false,
118
  "normalized": false,
 
135
  "<|source_interpretation_end|>"
136
  ],
137
  "clean_up_tokenization_spaces": true,
138
+ "extra_special_tokens": {},
139
  "model_max_length": 1000000000000000019884624838656,
140
  "tokenizer_class": "PreTrainedTokenizerFast"
141
+ }