SuiGio commited on
Commit
21c91c8
·
verified ·
1 Parent(s): 38cd38a

Upload tokenizer

Browse files
Files changed (4) hide show
  1. merges.txt +0 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +2 -1
  4. vocab.json +0 -0
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -35,7 +35,7 @@
35
  "single_word": false
36
  },
37
  "model_max_length": 512,
38
- "name_or_path": "distilroberta-base",
39
  "pad_token": {
40
  "__type": "AddedToken",
41
  "content": "<pad>",
@@ -54,6 +54,7 @@
54
  },
55
  "special_tokens_map_file": null,
56
  "tokenizer_class": "RobertaTokenizer",
 
57
  "unk_token": {
58
  "__type": "AddedToken",
59
  "content": "<unk>",
 
35
  "single_word": false
36
  },
37
  "model_max_length": 512,
38
+ "name_or_path": "SuiGio/roberta_pubmesh",
39
  "pad_token": {
40
  "__type": "AddedToken",
41
  "content": "<pad>",
 
54
  },
55
  "special_tokens_map_file": null,
56
  "tokenizer_class": "RobertaTokenizer",
57
+ "trim_offsets": true,
58
  "unk_token": {
59
  "__type": "AddedToken",
60
  "content": "<unk>",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff