glenn2 commited on
Commit
7e58dae
·
verified ·
1 Parent(s): 58a8ba9

Training in progress, step 1000

Browse files
added_tokens.json CHANGED
@@ -1537,6 +1537,7 @@
1537
  "<|hu|>": 50286,
1538
  "<|hy|>": 50312,
1539
  "<|id|>": 50275,
 
1540
  "<|is|>": 50311,
1541
  "<|it|>": 50274,
1542
  "<|ja|>": 50266,
 
1537
  "<|hu|>": 50286,
1538
  "<|hy|>": 50312,
1539
  "<|id|>": 50275,
1540
+ "<|inhale|>": 51865,
1541
  "<|is|>": 50311,
1542
  "<|it|>": 50274,
1543
  "<|ja|>": 50266,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25b04549416508df083e608192f5992c61327e6646ab0dc1976da02a3129b893
3
  size 966998152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c22b80d0187e972c44791c9c003d506323895c3336eeeb50f617d68aefede0f4
3
  size 966998152
runs/Apr14_21-18-11_adce8d5f9429/events.out.tfevents.1713129557.adce8d5f9429.2469.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca680d20bbbbef93748538e97d80926c7a7f38f75d0ca469f6fd0ad21414432
3
+ size 15107
special_tokens_map.json CHANGED
@@ -1,112 +1,12 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|endoftext|>",
4
- "<|startoftranscript|>",
5
- "<|en|>",
6
- "<|zh|>",
7
- "<|de|>",
8
- "<|es|>",
9
- "<|ru|>",
10
- "<|ko|>",
11
- "<|fr|>",
12
- "<|ja|>",
13
- "<|pt|>",
14
- "<|tr|>",
15
- "<|pl|>",
16
- "<|ca|>",
17
- "<|nl|>",
18
- "<|ar|>",
19
- "<|sv|>",
20
- "<|it|>",
21
- "<|id|>",
22
- "<|hi|>",
23
- "<|fi|>",
24
- "<|vi|>",
25
- "<|he|>",
26
- "<|uk|>",
27
- "<|el|>",
28
- "<|ms|>",
29
- "<|cs|>",
30
- "<|ro|>",
31
- "<|da|>",
32
- "<|hu|>",
33
- "<|ta|>",
34
- "<|no|>",
35
- "<|th|>",
36
- "<|ur|>",
37
- "<|hr|>",
38
- "<|bg|>",
39
- "<|lt|>",
40
- "<|la|>",
41
- "<|mi|>",
42
- "<|ml|>",
43
- "<|cy|>",
44
- "<|sk|>",
45
- "<|te|>",
46
- "<|fa|>",
47
- "<|lv|>",
48
- "<|bn|>",
49
- "<|sr|>",
50
- "<|az|>",
51
- "<|sl|>",
52
- "<|kn|>",
53
- "<|et|>",
54
- "<|mk|>",
55
- "<|br|>",
56
- "<|eu|>",
57
- "<|is|>",
58
- "<|hy|>",
59
- "<|ne|>",
60
- "<|mn|>",
61
- "<|bs|>",
62
- "<|kk|>",
63
- "<|sq|>",
64
- "<|sw|>",
65
- "<|gl|>",
66
- "<|mr|>",
67
- "<|pa|>",
68
- "<|si|>",
69
- "<|km|>",
70
- "<|sn|>",
71
- "<|yo|>",
72
- "<|so|>",
73
- "<|af|>",
74
- "<|oc|>",
75
- "<|ka|>",
76
- "<|be|>",
77
- "<|tg|>",
78
- "<|sd|>",
79
- "<|gu|>",
80
- "<|am|>",
81
- "<|yi|>",
82
- "<|lo|>",
83
- "<|uz|>",
84
- "<|fo|>",
85
- "<|ht|>",
86
- "<|ps|>",
87
- "<|tk|>",
88
- "<|nn|>",
89
- "<|mt|>",
90
- "<|sa|>",
91
- "<|lb|>",
92
- "<|my|>",
93
- "<|bo|>",
94
- "<|tl|>",
95
- "<|mg|>",
96
- "<|as|>",
97
- "<|tt|>",
98
- "<|haw|>",
99
- "<|ln|>",
100
- "<|ha|>",
101
- "<|ba|>",
102
- "<|jw|>",
103
- "<|su|>",
104
- "<|translate|>",
105
- "<|transcribe|>",
106
- "<|startoflm|>",
107
- "<|startofprev|>",
108
- "<|nocaptions|>",
109
- "<|notimestamps|>"
110
  ],
111
  "bos_token": {
112
  "content": "<|endoftext|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|inhale|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ],
11
  "bos_token": {
12
  "content": "<|endoftext|>",
tokenizer_config.json CHANGED
@@ -12865,116 +12865,18 @@
12865
  "rstrip": false,
12866
  "single_word": false,
12867
  "special": false
 
 
 
 
 
 
 
 
12868
  }
12869
  },
12870
  "additional_special_tokens": [
12871
- "<|endoftext|>",
12872
- "<|startoftranscript|>",
12873
- "<|en|>",
12874
- "<|zh|>",
12875
- "<|de|>",
12876
- "<|es|>",
12877
- "<|ru|>",
12878
- "<|ko|>",
12879
- "<|fr|>",
12880
- "<|ja|>",
12881
- "<|pt|>",
12882
- "<|tr|>",
12883
- "<|pl|>",
12884
- "<|ca|>",
12885
- "<|nl|>",
12886
- "<|ar|>",
12887
- "<|sv|>",
12888
- "<|it|>",
12889
- "<|id|>",
12890
- "<|hi|>",
12891
- "<|fi|>",
12892
- "<|vi|>",
12893
- "<|he|>",
12894
- "<|uk|>",
12895
- "<|el|>",
12896
- "<|ms|>",
12897
- "<|cs|>",
12898
- "<|ro|>",
12899
- "<|da|>",
12900
- "<|hu|>",
12901
- "<|ta|>",
12902
- "<|no|>",
12903
- "<|th|>",
12904
- "<|ur|>",
12905
- "<|hr|>",
12906
- "<|bg|>",
12907
- "<|lt|>",
12908
- "<|la|>",
12909
- "<|mi|>",
12910
- "<|ml|>",
12911
- "<|cy|>",
12912
- "<|sk|>",
12913
- "<|te|>",
12914
- "<|fa|>",
12915
- "<|lv|>",
12916
- "<|bn|>",
12917
- "<|sr|>",
12918
- "<|az|>",
12919
- "<|sl|>",
12920
- "<|kn|>",
12921
- "<|et|>",
12922
- "<|mk|>",
12923
- "<|br|>",
12924
- "<|eu|>",
12925
- "<|is|>",
12926
- "<|hy|>",
12927
- "<|ne|>",
12928
- "<|mn|>",
12929
- "<|bs|>",
12930
- "<|kk|>",
12931
- "<|sq|>",
12932
- "<|sw|>",
12933
- "<|gl|>",
12934
- "<|mr|>",
12935
- "<|pa|>",
12936
- "<|si|>",
12937
- "<|km|>",
12938
- "<|sn|>",
12939
- "<|yo|>",
12940
- "<|so|>",
12941
- "<|af|>",
12942
- "<|oc|>",
12943
- "<|ka|>",
12944
- "<|be|>",
12945
- "<|tg|>",
12946
- "<|sd|>",
12947
- "<|gu|>",
12948
- "<|am|>",
12949
- "<|yi|>",
12950
- "<|lo|>",
12951
- "<|uz|>",
12952
- "<|fo|>",
12953
- "<|ht|>",
12954
- "<|ps|>",
12955
- "<|tk|>",
12956
- "<|nn|>",
12957
- "<|mt|>",
12958
- "<|sa|>",
12959
- "<|lb|>",
12960
- "<|my|>",
12961
- "<|bo|>",
12962
- "<|tl|>",
12963
- "<|mg|>",
12964
- "<|as|>",
12965
- "<|tt|>",
12966
- "<|haw|>",
12967
- "<|ln|>",
12968
- "<|ha|>",
12969
- "<|ba|>",
12970
- "<|jw|>",
12971
- "<|su|>",
12972
- "<|translate|>",
12973
- "<|transcribe|>",
12974
- "<|startoflm|>",
12975
- "<|startofprev|>",
12976
- "<|nocaptions|>",
12977
- "<|notimestamps|>"
12978
  ],
12979
  "bos_token": "<|endoftext|>",
12980
  "clean_up_tokenization_spaces": true,
 
12865
  "rstrip": false,
12866
  "single_word": false,
12867
  "special": false
12868
+ },
12869
+ "51865": {
12870
+ "content": "<|inhale|>",
12871
+ "lstrip": false,
12872
+ "normalized": false,
12873
+ "rstrip": false,
12874
+ "single_word": false,
12875
+ "special": true
12876
  }
12877
  },
12878
  "additional_special_tokens": [
12879
+ "<|inhale|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12880
  ],
12881
  "bos_token": "<|endoftext|>",
12882
  "clean_up_tokenization_spaces": true,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:158aaa70b4d67d7eaddc5c425a07f96e9f4f5b96f38228e27de909ec3972169b
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081ea84218f228ab8643dd9a7ba1e6c53d512f21baed4ce8288d34a51cd6c4ee
3
  size 5048