echarlaix HF Staff commited on
Commit
626f152
·
1 Parent(s): ce83703

add tiny random model

Browse files
__init__.py ADDED
File without changes
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hf-internal-testing/tiny-random-gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2CustomLMHeadModel"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "attn_pdrop": 0.1,
9
+ "auto_map": {
10
+ "AutoModelForCausalLM": "modeling_gpt2.GPT2CustomLMHeadModel"
11
+ },
12
+ "bos_token_id": 98,
13
+ "embd_pdrop": 0.1,
14
+ "eos_token_id": 98,
15
+ "gradient_checkpointing": false,
16
+ "hidden_act": "gelu",
17
+ "hidden_dropout_prob": 0.1,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 37,
20
+ "layer_norm_epsilon": 1e-05,
21
+ "model_type": "gpt2",
22
+ "n_ctx": 512,
23
+ "n_embd": 32,
24
+ "n_head": 4,
25
+ "n_inner": null,
26
+ "n_layer": 5,
27
+ "n_positions": 512,
28
+ "pad_token_id": 98,
29
+ "reorder_and_upcast_attn": false,
30
+ "resid_pdrop": 0.1,
31
+ "scale_attn_by_inverse_layer_idx": false,
32
+ "scale_attn_weights": true,
33
+ "summary_activation": null,
34
+ "summary_first_dropout": 0.1,
35
+ "summary_proj_to_labels": true,
36
+ "summary_type": "cls_index",
37
+ "summary_use_proj": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.25.1",
40
+ "type_vocab_size": 16,
41
+ "use_cache": true,
42
+ "vocab_size": 1000
43
+ }
create_model.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoConfig
2
+
3
+ from modeling.modeling_gpt2 import GPT2CustomLMHeadModel
4
+
5
+ cfg = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-gpt2")
6
+
7
+ GPT2CustomLMHeadModel.register_for_auto_class("AutoModelForCausalLM")
8
+
9
+ model = GPT2CustomLMHeadModel(cfg)
10
+ model.save_pretrained("/home/fxmarty/hf_internship/tiny-testing-gpt2-remote-code")
merges.txt ADDED
@@ -0,0 +1,807 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
+ Ġ t
3
+ h e
4
+ Ġ a
5
+ i n
6
+ Ġt he
7
+ e r
8
+ o n
9
+ Ġ ,
10
+ r e
11
+ Ġ s
12
+ e d
13
+ Ġ o
14
+ Ġ w
15
+ n d
16
+ a t
17
+ Ġ .
18
+ o r
19
+ i t
20
+ Ġ c
21
+ e n
22
+ Ġ f
23
+ i s
24
+ e s
25
+ a r
26
+ Ġo f
27
+ Ġ b
28
+ a n
29
+ Ġ in
30
+ a l
31
+ in g
32
+ Ġ p
33
+ Ġa nd
34
+ a s
35
+ Ġt o
36
+ r o
37
+ i c
38
+ Ġ m
39
+ Ġ d
40
+ Ġ h
41
+ i on
42
+ l e
43
+ o u
44
+ Ġ T
45
+ Ġ re
46
+ Ġ =
47
+ Ġ "
48
+ Ġ A
49
+ Ġ S
50
+ en t
51
+ i l
52
+ Ġt h
53
+ Ġ 1
54
+ s t
55
+ Ġ C
56
+ e l
57
+ o m
58
+ Ġ l
59
+ a m
60
+ Ġ Ċ
61
+ Ġ e
62
+ Ġ n
63
+ Ġ @
64
+ a d
65
+ a c
66
+ Ġw as
67
+ Ġ M
68
+ u r
69
+ ĠT he
70
+ e c
71
+ Ġ on
72
+ l y
73
+ Ġ B
74
+ Ġ I
75
+ Ġ g
76
+ Ġ '
77
+ e t
78
+ o l
79
+ i d
80
+ i v
81
+ i m
82
+ Ġf or
83
+ i r
84
+ - @
85
+ Ġ@ -@
86
+ i g
87
+ o t
88
+ t er
89
+ Ġa s
90
+ Ġ H
91
+ u s
92
+ o w
93
+ Ġs t
94
+ u t
95
+ it h
96
+ a y
97
+ Ġ 2
98
+ Ġ P
99
+ at ion
100
+ v er
101
+ Ġb e
102
+ he r
103
+ Ġth at
104
+ Ġw ith
105
+ Ġ R
106
+ c e
107
+ t h
108
+ Ġ D
109
+ Ġ is
110
+ u n
111
+ e m
112
+ Ġ F
113
+ Ġw h
114
+ u l
115
+ Ġb y
116
+ Ġa l
117
+ c h
118
+ Ġ )
119
+ Ġ (
120
+ Ġ W
121
+ Ġc on
122
+ r a
123
+ Ġ G
124
+ o s
125
+ Ġ L
126
+ Ġ N
127
+ Ġa t
128
+ er s
129
+ c t
130
+ Ġ it
131
+ Ġ1 9
132
+ ro m
133
+ a nd
134
+ Ġa n
135
+ u m
136
+ es t
137
+ Ġ J
138
+ a g
139
+ Ġ he
140
+ 0 0
141
+ is t
142
+ a in
143
+ o d
144
+ a v
145
+ r i
146
+ Ġ E
147
+ Ġ O
148
+ Ġf rom
149
+ Ġc om
150
+ Ġh is
151
+ o p
152
+ Ġp ro
153
+ re s
154
+ i es
155
+ i f
156
+ Ġ v
157
+ or t
158
+ er e
159
+ il l
160
+ l d
161
+ Ġd e
162
+ p p
163
+ Ġs u
164
+ o re
165
+ ĠI n
166
+ Ġ r
167
+ Ġs e
168
+ Ġw ere
169
+ e w
170
+ on g
171
+ ig h
172
+ ar d
173
+ at e
174
+ al l
175
+ ar t
176
+ a k
177
+ ic h
178
+ Ġc h
179
+ Ġo r
180
+ a b
181
+ an t
182
+ u d
183
+ o c
184
+ b er
185
+ Ġe x
186
+ g h
187
+ it y
188
+ at ed
189
+ p t
190
+ es s
191
+ e ar
192
+ Ġ K
193
+ Ġp l
194
+ am e
195
+ q u
196
+ iv e
197
+ ro u
198
+ Ġa re
199
+ Ġ â
200
+ Ġs h
201
+ Ġ k
202
+ ac k
203
+ ec t
204
+ Ġâ Ģ
205
+ Ġ U
206
+ Ġh ad
207
+ s e
208
+ Ġwh ich
209
+ re d
210
+ o v
211
+ ĠS t
212
+ as t
213
+ Ġs p
214
+ i an
215
+ Ġ y
216
+ m ent
217
+ Ġ le
218
+ Ġn ot
219
+ g e
220
+ or d
221
+ r it
222
+ i p
223
+ in e
224
+ el l
225
+ al ly
226
+ ou r
227
+ o st
228
+ igh t
229
+ t her
230
+ a p
231
+ Ġ u
232
+ is h
233
+ ĠC h
234
+ ou n
235
+ i a
236
+ Ġ 3
237
+ av e
238
+ ar y
239
+ u st
240
+ o g
241
+ Ġ2 00
242
+ Ġ un
243
+ ou s
244
+ ir st
245
+ Ġ V
246
+ c c
247
+ Ġin c
248
+ Ġ ;
249
+ Ġcom p
250
+ r u
251
+ ion s
252
+ Ġthe ir
253
+ Ġb ut
254
+ id e
255
+ u re
256
+ s o
257
+ Ġcon t
258
+ Ġin t
259
+ f ter
260
+ ic al
261
+ i al
262
+ Ġa r
263
+ Ġf irst
264
+ ou ld
265
+ Ġit s
266
+ he d
267
+ ĠâĢ ĵ
268
+ Ġw he
269
+ w o
270
+ ou t
271
+ u b
272
+ Ġ2 0
273
+ f f
274
+ Ġ :
275
+ u e
276
+ Ġ her
277
+ ow n
278
+ o k
279
+ Ġal so
280
+ Ġc l
281
+ p er
282
+ ig n
283
+ at er
284
+ r an
285
+ or m
286
+ i e
287
+ om e
288
+ or k
289
+ as s
290
+ i re
291
+ e nd
292
+ Ġre s
293
+ Ġa b
294
+ Ġa d
295
+ Ġ us
296
+ r y
297
+ Ġre c
298
+ Ġh ave
299
+ ag e
300
+ ĠH e
301
+ Ġ 4
302
+ Ġ ro
303
+ m er
304
+ Ġon e
305
+ on d
306
+ l ow
307
+ Ġh as
308
+ ĠT h
309
+ d u
310
+ Ġ 5
311
+ Ġp er
312
+ Ġbe en
313
+ im e
314
+ Ġt wo
315
+ en ce
316
+ l and
317
+ Ġ1 8
318
+ . @
319
+ Ġ@ .@
320
+ ul t
321
+ re e
322
+ ou gh
323
+ i le
324
+ Ġwh o
325
+ ĠA l
326
+ Ġs c
327
+ ur ing
328
+ p l
329
+ or y
330
+ it ion
331
+ r ic
332
+ ation s
333
+ Ġd is
334
+ Ġth is
335
+ Ġb ec
336
+ Ġa pp
337
+ i z
338
+ ĠI t
339
+ a re
340
+ ac h
341
+ l ud
342
+ ad e
343
+ Ġpl ay
344
+ Ġ j
345
+ Ġm an
346
+ ac t
347
+ el y
348
+ Ġp art
349
+ Ġd es
350
+ Ġa g
351
+ Ġthe y
352
+ Ġy ear
353
+ oun t
354
+ Ġ20 1
355
+ Ġo ver
356
+ Ġo ther
357
+ ou nd
358
+ Ġa fter
359
+ i b
360
+ o ver
361
+ Ġs er
362
+ Ġ en
363
+ Ġof f
364
+ Ġ im
365
+ ct ion
366
+ Ġ Y
367
+ k e
368
+ it e
369
+ , @
370
+ Ġ@ ,@
371
+ t e
372
+ ur n
373
+ Ġinc lud
374
+ res s
375
+ an ce
376
+ an g
377
+ Ġat t
378
+ ic e
379
+ ac e
380
+ ar k
381
+ Ġo ut
382
+ w n
383
+ p h
384
+ em ber
385
+ Ġp re
386
+ Ġu p
387
+ en s
388
+ m an
389
+ Ġe v
390
+ Ġt ime
391
+ nd er
392
+ rou gh
393
+ c ed
394
+ Ġf in
395
+ Ġint o
396
+ on e
397
+ p ort
398
+ rou nd
399
+ w e
400
+ re n
401
+ l es
402
+ in t
403
+ ĠO n
404
+ v el
405
+ Ġcom m
406
+ Ġs he
407
+ as on
408
+ am p
409
+ Ġt e
410
+ Ġw ould
411
+ w ard
412
+ Ġm ore
413
+ Ġ 6
414
+ i ed
415
+ os e
416
+ ri b
417
+ ĠU n
418
+ Ġal l
419
+ ing s
420
+ ter n
421
+ c es
422
+ ab le
423
+ Ġw e
424
+ it ed
425
+ e ver
426
+ ent s
427
+ Ġh im
428
+ as ed
429
+ or s
430
+ o y
431
+ o od
432
+ Ġc ent
433
+ i x
434
+ as e
435
+ il d
436
+ ĠA n
437
+ Ġ 7
438
+ Ġw ork
439
+ at es
440
+ i ous
441
+ at h
442
+ Ġp o
443
+ ro p
444
+ ol d
445
+ al s
446
+ is s
447
+ e y
448
+ ic t
449
+ Ġf e
450
+ Ġthe m
451
+ g an
452
+ Ġs ec
453
+ Ġb et
454
+ Ġwhe n
455
+ Ġs ong
456
+ Ġre m
457
+ e p
458
+ f orm
459
+ a il
460
+ f er
461
+ Ġe ar
462
+ ub l
463
+ a w
464
+ Ġk n
465
+ ak e
466
+ a us
467
+ Ġm ost
468
+ Ġcon s
469
+ Ġd uring
470
+ ĠA s
471
+ or th
472
+ Ġn ew
473
+ er ed
474
+ il m
475
+ v ed
476
+ at t
477
+ Ġon ly
478
+ Ġ 9
479
+ Ġd ec
480
+ Ġ 8
481
+ ic k
482
+ Ġg ame
483
+ on s
484
+ u g
485
+ Ġt r
486
+ f t
487
+ ot h
488
+ o ok
489
+ ĠM ar
490
+ re at
491
+ w ay
492
+ Ġc an
493
+ ol low
494
+ ou th
495
+ we en
496
+ ĠE n
497
+ Ġ19 9
498
+ ter s
499
+ Ġre l
500
+ in d
501
+ Ġab out
502
+ Ġse ason
503
+ Ġag ain
504
+ r al
505
+ Ġth ree
506
+ ation al
507
+ Ġu nder
508
+ ul ar
509
+ Ġm e
510
+ Ġth an
511
+ ĠC om
512
+ ĠA r
513
+ h ip
514
+ o b
515
+ Ġn e
516
+ Ġbet ween
517
+ Ġf l
518
+ h n
519
+ v e
520
+ Ġch ar
521
+ Ġc ol
522
+ Ġrec ord
523
+ i ew
524
+ r on
525
+ f ore
526
+ Ġth rough
527
+ is ion
528
+ or n
529
+ Ġ 00
530
+ oc k
531
+ Ġ ver
532
+ Ġl ater
533
+ Ġn um
534
+ Ġe nd
535
+ ol og
536
+ am es
537
+ Ġp os
538
+ Ġw rit
539
+ Ġpro du
540
+ Ġwh ile
541
+ Ġa ct
542
+ Ġre le
543
+ Ġf ilm
544
+ is hed
545
+ Ġp r
546
+ an s
547
+ Ġre g
548
+ Ġfor m
549
+ Ġas s
550
+ ĠS e
551
+ ur y
552
+ t ed
553
+ t s
554
+ Ġm ade
555
+ Ġsu b
556
+ Ġp e
557
+ Ġs o
558
+ or ld
559
+ Ġre t
560
+ ĠN ew
561
+ Ġsp ec
562
+ Ġa cc
563
+ Ġ qu
564
+ Ġwhe re
565
+ en er
566
+ Ġm ov
567
+ he s
568
+ mer ic
569
+ at ing
570
+ Ġin ter
571
+ ĠL e
572
+ ĠA meric
573
+ Ġ ra
574
+ Ġs ome
575
+ Ġc o
576
+ Ġl ar
577
+ Ġb u
578
+ Ġde f
579
+ b um
580
+ Ġa c
581
+ Ġm us
582
+ Ġf ollow
583
+ ĠA t
584
+ in s
585
+ iv ed
586
+ if ic
587
+ u al
588
+ Ġa m
589
+ Ġsu ch
590
+ Ġsec ond
591
+ i ke
592
+ Ġf our
593
+ Ġin d
594
+ an n
595
+ he n
596
+ Ġus ed
597
+ ĠR e
598
+ ic s
599
+ le ct
600
+ Ġd ay
601
+ i el
602
+ il y
603
+ ĠTh is
604
+ Ġ 0
605
+ Ġp ubl
606
+ Ġc all
607
+ ĠJ o
608
+ l l
609
+ Ġal bum
610
+ Ġ00 0
611
+ ran s
612
+ Ġd o
613
+ an y
614
+ Ġbe fore
615
+ ro s
616
+ ĠS h
617
+ Ġs y
618
+ a id
619
+ ĠEn g
620
+ Ġbe ing
621
+ Ġ1 0
622
+ u c
623
+ Ġe p
624
+ Ġsu pp
625
+ Ġthe re
626
+ Ġyear s
627
+ ar s
628
+ ow ever
629
+ Ġ ent
630
+ if e
631
+ Ġh igh
632
+ Ġf ound
633
+ ir d
634
+ Ġn o
635
+ Ġs et
636
+ in es
637
+ iv er
638
+ i o
639
+ ot her
640
+ j ect
641
+ Ġs ur
642
+ a j
643
+ t en
644
+ Ġt ra
645
+ Ġ1 2
646
+ is ed
647
+ it ies
648
+ vel op
649
+ Ġb l
650
+ al e
651
+ Ġser ies
652
+ Ġl oc
653
+ Ġnum ber
654
+ Ġp res
655
+ an e
656
+ aus e
657
+ od e
658
+ e k
659
+ t on
660
+ ĠS c
661
+ i er
662
+ is e
663
+ Ġse ver
664
+ in ce
665
+ Ġb oth
666
+ an k
667
+ ro w
668
+ ire ct
669
+ s on
670
+ Ġthe n
671
+ ĠB rit
672
+ i et
673
+ Ġ1 6
674
+ Ġep is
675
+ Ġinclud ing
676
+ it s
677
+ ig in
678
+ p r
679
+ Ġ /
680
+ Ġagain st
681
+ Ġw ell
682
+ Ġbec ame
683
+ Ġex p
684
+ Ġkn own
685
+ Ġt rans
686
+ Ġchar ac
687
+ ĠâĢ Ķ
688
+ r am
689
+ Ġb ack
690
+ Ġad d
691
+ Ġp op
692
+ Ġg o
693
+ ur ch
694
+ Ġdes c
695
+ Ġs ing
696
+ iel d
697
+ Ġper form
698
+ ain ed
699
+ Ġre ce
700
+ id ent
701
+ Ġe m
702
+ er t
703
+ u res
704
+ Ġin v
705
+ Ġde p
706
+ Ġ19 8
707
+ a ir
708
+ er n
709
+ at her
710
+ f ul
711
+ Ġ Z
712
+ Ġm on
713
+ Ġman y
714
+ Ġm ain
715
+ Ġst ud
716
+ Ġl ong
717
+ in n
718
+ th ough
719
+ u p
720
+ o ol
721
+ ĠUn ited
722
+ l ed
723
+ em ent
724
+ Ġ1 5
725
+ ow er
726
+ ĠJo hn
727
+ Ġo p
728
+ Ġ1 1
729
+ in ed
730
+ Ġm et
731
+ o ber
732
+ le y
733
+ Ġ1 7
734
+ Ġcent ury
735
+ Ġte am
736
+ Ġ est
737
+ ĠA fter
738
+ y l
739
+ Ġm in
740
+ u ch
741
+ ut e
742
+ Ġde velop
743
+ ĠS he
744
+ i am
745
+ Ġsh ow
746
+ el f
747
+ Ġre p
748
+ Ġcon c
749
+ at ive
750
+ Ġc re
751
+ over n
752
+ a red
753
+ Ġ19 4
754
+ Ġor igin
755
+ Ġs m
756
+ iv ers
757
+ a z
758
+ Ġle ad
759
+ Ġsever al
760
+ a h
761
+ Ġo b
762
+ Ġre v
763
+ Ġm ill
764
+ er m
765
+ u ally
766
+ o ot
767
+ Ġbe gan
768
+ Ġ19 6
769
+ i red
770
+ Ġd if
771
+ Ġcont in
772
+ Ġs ign
773
+ i k
774
+ ĠI nd
775
+ ment s
776
+ iz ed
777
+ Ġ19 7
778
+ Ġd irect
779
+ a u
780
+ Ġex t
781
+ ros s
782
+ em b
783
+ d er
784
+ Ġp ol
785
+ Ġm ay
786
+ a pt
787
+ el s
788
+ ĠW h
789
+ Ġcomp le
790
+ Ġar t
791
+ ĠB r
792
+ ĠI s
793
+ un e
794
+ t il
795
+ Ġc rit
796
+ Ġh ist
797
+ Ġear ly
798
+ Ġc ould
799
+ ĠC on
800
+ Ġd id
801
+ Ġb el
802
+ Ġcall ed
803
+ u ed
804
+ Ġn ear
805
+ Ġepis ode
806
+ y p
807
+ Ġdesc rib
modeling_gpt2.py ADDED
@@ -0,0 +1,1548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
3
+ # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """PyTorch OpenAI GPT-2 model."""
17
+
18
+ import math
19
+ import os
20
+ from dataclasses import dataclass
21
+ from typing import Optional, Tuple, Union
22
+ from transformers import GenerationMixin
23
+
24
+ import torch
25
+ import torch.utils.checkpoint
26
+ from torch import nn
27
+ from torch.cuda.amp import autocast
28
+ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
29
+
30
+ from transformers.activations import ACT2FN
31
+ from transformers.modeling_outputs import (
32
+ BaseModelOutputWithPastAndCrossAttentions,
33
+ CausalLMOutputWithCrossAttentions,
34
+ SequenceClassifierOutputWithPast,
35
+ TokenClassifierOutput,
36
+ )
37
+ from transformers.modeling_utils import PreTrainedModel, SequenceSummary
38
+ from transformers.pytorch_utils import Conv1D, find_pruneable_heads_and_indices, prune_conv1d_layer
39
+ from transformers.utils import (
40
+ ModelOutput,
41
+ add_code_sample_docstrings,
42
+ add_start_docstrings,
43
+ add_start_docstrings_to_model_forward,
44
+ logging,
45
+ replace_return_docstrings,
46
+ )
47
+ from transformers.utils.model_parallel_utils import assert_device_map, get_device_map
48
+ from transformers import GPT2Config
49
+
50
+
51
+ logger = logging.get_logger(__name__)
52
+
53
+ _CHECKPOINT_FOR_DOC = "gpt2"
54
+ _CONFIG_FOR_DOC = "GPT2Config"
55
+ _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
56
+
57
+ GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
58
+ "gpt2",
59
+ "gpt2-medium",
60
+ "gpt2-large",
61
+ "gpt2-xl",
62
+ "distilgpt2",
63
+ # See all GPT-2 models at https://huggingface.co/models?filter=gpt2
64
+ ]
65
+
66
+
67
+ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
68
+ """Load tf checkpoints in a pytorch model"""
69
+ try:
70
+ import re
71
+
72
+ import tensorflow as tf
73
+ except ImportError:
74
+ logger.error(
75
+ "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
76
+ "https://www.tensorflow.org/install/ for installation instructions."
77
+ )
78
+ raise
79
+ tf_path = os.path.abspath(gpt2_checkpoint_path)
80
+ logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
81
+ # Load weights from TF model
82
+ init_vars = tf.train.list_variables(tf_path)
83
+ names = []
84
+ arrays = []
85
+ for name, shape in init_vars:
86
+ logger.info(f"Loading TF weight {name} with shape {shape}")
87
+ array = tf.train.load_variable(tf_path, name)
88
+ names.append(name)
89
+ arrays.append(array.squeeze())
90
+
91
+ for name, array in zip(names, arrays):
92
+ name = name[6:] # skip "model/"
93
+ name = name.split("/")
94
+ pointer = model
95
+ for m_name in name:
96
+ if re.fullmatch(r"[A-Za-z]+\d+", m_name):
97
+ scope_names = re.split(r"(\d+)", m_name)
98
+ else:
99
+ scope_names = [m_name]
100
+ if scope_names[0] == "w" or scope_names[0] == "g":
101
+ pointer = getattr(pointer, "weight")
102
+ elif scope_names[0] == "b":
103
+ pointer = getattr(pointer, "bias")
104
+ elif scope_names[0] == "wpe" or scope_names[0] == "wte":
105
+ pointer = getattr(pointer, scope_names[0])
106
+ pointer = getattr(pointer, "weight")
107
+ else:
108
+ pointer = getattr(pointer, scope_names[0])
109
+ if len(scope_names) >= 2:
110
+ num = int(scope_names[1])
111
+ pointer = pointer[num]
112
+ try:
113
+ assert (
114
+ pointer.shape == array.shape
115
+ ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
116
+ except AssertionError as e:
117
+ e.args += (pointer.shape, array.shape)
118
+ raise
119
+ logger.info(f"Initialize PyTorch weight {name}")
120
+ pointer.data = torch.from_numpy(array)
121
+ return model
122
+
123
+
124
+ class GPT2Attention(nn.Module):
125
+ def __init__(self, config, is_cross_attention=False, layer_idx=None):
126
+ super().__init__()
127
+
128
+ max_positions = config.max_position_embeddings
129
+ self.register_buffer(
130
+ "bias",
131
+ torch.tril(torch.ones((max_positions, max_positions), dtype=torch.uint8)).view(
132
+ 1, 1, max_positions, max_positions
133
+ ),
134
+ )
135
+ self.register_buffer("masked_bias", torch.tensor(-1e4))
136
+
137
+ self.embed_dim = config.hidden_size
138
+ self.num_heads = config.num_attention_heads
139
+ self.head_dim = self.embed_dim // self.num_heads
140
+ self.split_size = self.embed_dim
141
+ if self.head_dim * self.num_heads != self.embed_dim:
142
+ raise ValueError(
143
+ f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:"
144
+ f" {self.num_heads})."
145
+ )
146
+
147
+ self.scale_attn_weights = config.scale_attn_weights
148
+ self.is_cross_attention = is_cross_attention
149
+
150
+ # Layer-wise attention scaling, reordering, and upcasting
151
+ self.scale_attn_by_inverse_layer_idx = config.scale_attn_by_inverse_layer_idx
152
+ self.layer_idx = layer_idx
153
+ self.reorder_and_upcast_attn = config.reorder_and_upcast_attn
154
+
155
+ if self.is_cross_attention:
156
+ self.c_attn = Conv1D(2 * self.embed_dim, self.embed_dim)
157
+ self.q_attn = Conv1D(self.embed_dim, self.embed_dim)
158
+ else:
159
+ self.c_attn = Conv1D(3 * self.embed_dim, self.embed_dim)
160
+ self.c_proj = Conv1D(self.embed_dim, self.embed_dim)
161
+
162
+ self.attn_dropout = nn.Dropout(config.attn_pdrop)
163
+ self.resid_dropout = nn.Dropout(config.resid_pdrop)
164
+
165
+ self.pruned_heads = set()
166
+
167
+ def prune_heads(self, heads):
168
+ if len(heads) == 0:
169
+ return
170
+ heads, index = find_pruneable_heads_and_indices(heads, self.num_heads, self.head_dim, self.pruned_heads)
171
+ index_attn = torch.cat([index, index + self.split_size, index + (2 * self.split_size)])
172
+
173
+ # Prune conv1d layers
174
+ self.c_attn = prune_conv1d_layer(self.c_attn, index_attn, dim=1)
175
+ self.c_proj = prune_conv1d_layer(self.c_proj, index, dim=0)
176
+
177
+ # Update hyper params
178
+ self.split_size = (self.split_size // self.num_heads) * (self.num_heads - len(heads))
179
+ self.num_heads = self.num_heads - len(heads)
180
+ self.pruned_heads = self.pruned_heads.union(heads)
181
+
182
+ def _attn(self, query, key, value, attention_mask=None, head_mask=None):
183
+ attn_weights = torch.matmul(query, key.transpose(-1, -2))
184
+
185
+ if self.scale_attn_weights:
186
+ attn_weights = attn_weights / torch.full(
187
+ [], value.size(-1) ** 0.5, dtype=attn_weights.dtype, device=attn_weights.device
188
+ )
189
+
190
+ # Layer-wise attention scaling
191
+ if self.scale_attn_by_inverse_layer_idx:
192
+ attn_weights = attn_weights / float(self.layer_idx + 1)
193
+
194
+ if not self.is_cross_attention:
195
+ # if only "normal" attention layer implements causal mask
196
+ query_length, key_length = query.size(-2), key.size(-2)
197
+ causal_mask = self.bias[:, :, key_length - query_length : key_length, :key_length].to(torch.bool)
198
+ mask_value = torch.finfo(attn_weights.dtype).min
199
+ # Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
200
+ # Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
201
+ mask_value = torch.full([], mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
202
+ attn_weights = torch.where(causal_mask, attn_weights, mask_value)
203
+
204
+ if attention_mask is not None:
205
+ # Apply the attention mask
206
+ attn_weights = attn_weights + attention_mask
207
+
208
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1)
209
+
210
+ # Downcast (if necessary) back to V's dtype (if in mixed-precision) -- No-Op otherwise
211
+ attn_weights = attn_weights.type(value.dtype)
212
+ attn_weights = self.attn_dropout(attn_weights)
213
+
214
+ # Mask heads if we want to
215
+ if head_mask is not None:
216
+ attn_weights = attn_weights * head_mask
217
+
218
+ attn_output = torch.matmul(attn_weights, value)
219
+
220
+ return attn_output, attn_weights
221
+
222
+ def _upcast_and_reordered_attn(self, query, key, value, attention_mask=None, head_mask=None):
223
+ # Use `torch.baddbmm` (a bit more efficient w/ alpha param for scaling -- from Megatron-LM)
224
+ bsz, num_heads, q_seq_len, dk = query.size()
225
+ _, _, k_seq_len, _ = key.size()
226
+
227
+ # Preallocate attn_weights for `baddbmm`
228
+ attn_weights = torch.empty(bsz * num_heads, q_seq_len, k_seq_len, dtype=torch.float32, device=query.device)
229
+
230
+ # Compute Scale Factor
231
+ scale_factor = 1.0
232
+ if self.scale_attn_weights:
233
+ scale_factor /= float(value.size(-1)) ** 0.5
234
+
235
+ if self.scale_attn_by_inverse_layer_idx:
236
+ scale_factor /= float(self.layer_idx + 1)
237
+
238
+ # Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk))
239
+ with autocast(enabled=False):
240
+ q, k = query.reshape(-1, q_seq_len, dk), key.transpose(-1, -2).reshape(-1, dk, k_seq_len)
241
+ attn_weights = torch.baddbmm(attn_weights, q.float(), k.float(), beta=0, alpha=scale_factor)
242
+ attn_weights = attn_weights.reshape(bsz, num_heads, q_seq_len, k_seq_len)
243
+
244
+ if not self.is_cross_attention:
245
+ # if only "normal" attention layer implements causal mask
246
+ query_length, key_length = query.size(-2), key.size(-2)
247
+ causal_mask = self.bias[:, :, key_length - query_length : key_length, :key_length].bool()
248
+ mask_value = torch.finfo(attn_weights.dtype).min
249
+ # Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
250
+ # Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
251
+ mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
252
+ attn_weights = torch.where(causal_mask, attn_weights, mask_value)
253
+
254
+ if attention_mask is not None:
255
+ # Apply the attention mask
256
+ attn_weights = attn_weights + attention_mask
257
+
258
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1)
259
+
260
+ # Downcast (if necessary) back to V's dtype (if in mixed-precision) -- No-Op if otherwise
261
+ if attn_weights.dtype != torch.float32:
262
+ raise RuntimeError("Error with upcasting, attn_weights does not have dtype torch.float32")
263
+ attn_weights = attn_weights.type(value.dtype)
264
+ attn_weights = self.attn_dropout(attn_weights)
265
+
266
+ # Mask heads if we want to
267
+ if head_mask is not None:
268
+ attn_weights = attn_weights * head_mask
269
+
270
+ attn_output = torch.matmul(attn_weights, value)
271
+
272
+ return attn_output, attn_weights
273
+
274
+ def _split_heads(self, tensor, num_heads, attn_head_size):
275
+ """
276
+ Splits hidden_size dim into attn_head_size and num_heads
277
+ """
278
+ new_shape = tensor.size()[:-1] + (num_heads, attn_head_size)
279
+ tensor = tensor.view(new_shape)
280
+ return tensor.permute(0, 2, 1, 3) # (batch, head, seq_length, head_features)
281
+
282
+ def _merge_heads(self, tensor, num_heads, attn_head_size):
283
+ """
284
+ Merges attn_head_size dim and num_attn_heads dim into hidden_size
285
+ """
286
+ tensor = tensor.permute(0, 2, 1, 3).contiguous()
287
+ new_shape = tensor.size()[:-2] + (num_heads * attn_head_size,)
288
+ return tensor.view(new_shape)
289
+
290
+ def forward(
291
+ self,
292
+ hidden_states: Optional[Tuple[torch.FloatTensor]],
293
+ layer_past: Optional[Tuple[torch.Tensor]] = None,
294
+ attention_mask: Optional[torch.FloatTensor] = None,
295
+ head_mask: Optional[torch.FloatTensor] = None,
296
+ encoder_hidden_states: Optional[torch.Tensor] = None,
297
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
298
+ use_cache: Optional[bool] = False,
299
+ output_attentions: Optional[bool] = False,
300
+ ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]], ...]:
301
+ if encoder_hidden_states is not None:
302
+ if not hasattr(self, "q_attn"):
303
+ raise ValueError(
304
+ "If class is used as cross attention, the weights `q_attn` have to be defined. "
305
+ "Please make sure to instantiate class with `GPT2Attention(..., is_cross_attention=True)`."
306
+ )
307
+
308
+ query = self.q_attn(hidden_states)
309
+ key, value = self.c_attn(encoder_hidden_states).split(self.split_size, dim=2)
310
+ attention_mask = encoder_attention_mask
311
+ else:
312
+ query, key, value = self.c_attn(hidden_states).split(self.split_size, dim=2)
313
+
314
+ query = self._split_heads(query, self.num_heads, self.head_dim)
315
+ key = self._split_heads(key, self.num_heads, self.head_dim)
316
+ value = self._split_heads(value, self.num_heads, self.head_dim)
317
+
318
+ value = value + 10
319
+ print("increased value")
320
+
321
+ if layer_past is not None:
322
+ past_key, past_value = layer_past
323
+ key = torch.cat((past_key, key), dim=-2)
324
+ value = torch.cat((past_value, value), dim=-2)
325
+
326
+ if use_cache is True:
327
+ present = (key, value)
328
+ else:
329
+ present = None
330
+
331
+ if self.reorder_and_upcast_attn:
332
+ attn_output, attn_weights = self._upcast_and_reordered_attn(query, key, value, attention_mask, head_mask)
333
+ else:
334
+ attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
335
+
336
+ attn_output = self._merge_heads(attn_output, self.num_heads, self.head_dim)
337
+ attn_output = self.c_proj(attn_output)
338
+ attn_output = self.resid_dropout(attn_output)
339
+
340
+ outputs = (attn_output, present)
341
+ if output_attentions:
342
+ outputs += (attn_weights,)
343
+
344
+ return outputs # a, present, (attentions)
345
+
346
+
347
+ class GPT2MLP(nn.Module):
348
+ def __init__(self, intermediate_size, config):
349
+ super().__init__()
350
+ embed_dim = config.hidden_size
351
+ self.c_fc = Conv1D(intermediate_size, embed_dim)
352
+ self.c_proj = Conv1D(embed_dim, intermediate_size)
353
+ self.act = ACT2FN[config.activation_function]
354
+ self.dropout = nn.Dropout(config.resid_pdrop)
355
+
356
+ def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -> torch.FloatTensor:
357
+ hidden_states = self.c_fc(hidden_states)
358
+ hidden_states = self.act(hidden_states)
359
+ hidden_states = self.c_proj(hidden_states)
360
+ hidden_states = self.dropout(hidden_states)
361
+ return hidden_states
362
+
363
+
364
+ class GPT2Block(nn.Module):
365
+ def __init__(self, config, layer_idx=None):
366
+ super().__init__()
367
+ hidden_size = config.hidden_size
368
+ inner_dim = config.n_inner if config.n_inner is not None else 4 * hidden_size
369
+
370
+ self.ln_1 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
371
+ self.attn = GPT2Attention(config, layer_idx=layer_idx)
372
+ self.ln_2 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
373
+
374
+ if config.add_cross_attention:
375
+ self.crossattention = GPT2Attention(config, is_cross_attention=True, layer_idx=layer_idx)
376
+ self.ln_cross_attn = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
377
+
378
+ self.mlp = GPT2MLP(inner_dim, config)
379
+
380
+ def forward(
381
+ self,
382
+ hidden_states: Optional[Tuple[torch.FloatTensor]],
383
+ layer_past: Optional[Tuple[torch.Tensor]] = None,
384
+ attention_mask: Optional[torch.FloatTensor] = None,
385
+ head_mask: Optional[torch.FloatTensor] = None,
386
+ encoder_hidden_states: Optional[torch.Tensor] = None,
387
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
388
+ use_cache: Optional[bool] = False,
389
+ output_attentions: Optional[bool] = False,
390
+ ) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]]:
391
+ residual = hidden_states
392
+ hidden_states = self.ln_1(hidden_states)
393
+ attn_outputs = self.attn(
394
+ hidden_states,
395
+ layer_past=layer_past,
396
+ attention_mask=attention_mask,
397
+ head_mask=head_mask,
398
+ use_cache=use_cache,
399
+ output_attentions=output_attentions,
400
+ )
401
+ attn_output = attn_outputs[0] # output_attn: a, present, (attentions)
402
+ outputs = attn_outputs[1:]
403
+ # residual connection
404
+ hidden_states = attn_output + residual
405
+
406
+ if encoder_hidden_states is not None:
407
+ # add one self-attention block for cross-attention
408
+ if not hasattr(self, "crossattention"):
409
+ raise ValueError(
410
+ f"If `encoder_hidden_states` are passed, {self} has to be instantiated with "
411
+ "cross-attention layers by setting `config.add_cross_attention=True`"
412
+ )
413
+ residual = hidden_states
414
+ hidden_states = self.ln_cross_attn(hidden_states)
415
+ cross_attn_outputs = self.crossattention(
416
+ hidden_states,
417
+ attention_mask=attention_mask,
418
+ head_mask=head_mask,
419
+ encoder_hidden_states=encoder_hidden_states,
420
+ encoder_attention_mask=encoder_attention_mask,
421
+ output_attentions=output_attentions,
422
+ )
423
+ attn_output = cross_attn_outputs[0]
424
+ # residual connection
425
+ hidden_states = residual + attn_output
426
+ outputs = outputs + cross_attn_outputs[2:] # add cross attentions if we output attention weights
427
+
428
+ residual = hidden_states
429
+ hidden_states = self.ln_2(hidden_states)
430
+ feed_forward_hidden_states = self.mlp(hidden_states)
431
+ # residual connection
432
+ hidden_states = residual + feed_forward_hidden_states
433
+
434
+ if use_cache:
435
+ outputs = (hidden_states,) + outputs
436
+ else:
437
+ outputs = (hidden_states,) + outputs[1:]
438
+
439
+ return outputs # hidden_states, present, (attentions, cross_attentions)
440
+
441
+
442
+ class GPT2PreTrainedModel(PreTrainedModel):
443
+ """
444
+ An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
445
+ models.
446
+ """
447
+
448
+ config_class = GPT2Config
449
+ load_tf_weights = load_tf_weights_in_gpt2
450
+ base_model_prefix = "transformer"
451
+ is_parallelizable = True
452
+ supports_gradient_checkpointing = True
453
+ _no_split_modules = ["GPT2Block"]
454
+
455
+ def __init__(self, *inputs, **kwargs):
456
+ super().__init__(*inputs, **kwargs)
457
+
458
+ def _init_weights(self, module):
459
+ """Initialize the weights."""
460
+ if isinstance(module, (nn.Linear, Conv1D)):
461
+ # Slightly different from the TF version which uses truncated_normal for initialization
462
+ # cf https://github.com/pytorch/pytorch/pull/5617
463
+ module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
464
+ if module.bias is not None:
465
+ module.bias.data.zero_()
466
+ elif isinstance(module, nn.Embedding):
467
+ module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
468
+ if module.padding_idx is not None:
469
+ module.weight.data[module.padding_idx].zero_()
470
+ elif isinstance(module, nn.LayerNorm):
471
+ module.bias.data.zero_()
472
+ module.weight.data.fill_(1.0)
473
+
474
+ # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme:
475
+ # > A modified initialization which accounts for the accumulation on the residual path with model depth. Scale
476
+ # > the weights of residual layers at initialization by a factor of 1/√N where N is the # of residual layers.
477
+ # > -- GPT-2 :: https://openai.com/blog/better-language-models/
478
+ #
479
+ # Reference (Megatron-LM): https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py
480
+ for name, p in module.named_parameters():
481
+ if name == "c_proj.weight":
482
+ # Special Scaled Initialization --> There are 2 Layer Norms per Transformer Block
483
+ p.data.normal_(mean=0.0, std=(self.config.initializer_range / math.sqrt(2 * self.config.n_layer)))
484
+
485
+ def _set_gradient_checkpointing(self, module, value=False):
486
+ if isinstance(module, GPT2Model):
487
+ module.gradient_checkpointing = value
488
+
489
+
490
+ @dataclass
491
+ class GPT2DoubleHeadsModelOutput(ModelOutput):
492
+ """
493
+ Base class for outputs of models predicting if two sentences are consecutive or not.
494
+
495
+ Args:
496
+ loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
497
+ Language modeling loss.
498
+ mc_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `mc_labels` is provided):
499
+ Multiple choice classification loss.
500
+ logits (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, config.vocab_size)`):
501
+ Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
502
+ mc_logits (`torch.FloatTensor` of shape `(batch_size, num_choices)`):
503
+ Prediction scores of the multiple choice classification head (scores for each choice before SoftMax).
504
+ past_key_values (`Tuple[Tuple[torch.Tensor]]`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
505
+ Tuple of length `config.n_layers`, containing tuples of tensors of shape `(batch_size, num_heads,
506
+ sequence_length, embed_size_per_head)`).
507
+
508
+ Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
509
+ `past_key_values` input) to speed up sequential decoding.
510
+ hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
511
+ Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
512
+ shape `(batch_size, sequence_length, hidden_size)`.
513
+
514
+ Hidden-states of the model at the output of each layer plus the initial embedding outputs.
515
+ attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
516
+ Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
517
+ sequence_length)`.
518
+
519
+ GPT2Attentions weights after the attention softmax, used to compute the weighted average in the
520
+ self-attention heads.
521
+ """
522
+
523
+ loss: Optional[torch.FloatTensor] = None
524
+ mc_loss: Optional[torch.FloatTensor] = None
525
+ logits: torch.FloatTensor = None
526
+ mc_logits: torch.FloatTensor = None
527
+ past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
528
+ hidden_states: Optional[Tuple[torch.FloatTensor]] = None
529
+ attentions: Optional[Tuple[torch.FloatTensor]] = None
530
+
531
+
532
+ GPT2_START_DOCSTRING = r"""
533
+
534
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
535
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
536
+ etc.)
537
+
538
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
539
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
540
+ and behavior.
541
+
542
+ Parameters:
543
+ config ([`GPT2Config`]): Model configuration class with all the parameters of the model.
544
+ Initializing with a config file does not load the weights associated with the model, only the
545
+ configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
546
+ """
547
+
548
+ GPT2_INPUTS_DOCSTRING = r"""
549
+ Args:
550
+ input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
551
+ `input_ids_length` = `sequence_length` if `past_key_values` is `None` else
552
+ `past_key_values[0][0].shape[-2]` (`sequence_length` of input past key value states). Indices of input
553
+ sequence tokens in the vocabulary.
554
+
555
+ If `past_key_values` is used, only `input_ids` that do not have their past calculated should be passed as
556
+ `input_ids`.
557
+
558
+ Indices can be obtained using [`GPT2Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
559
+ [`PreTrainedTokenizer.__call__`] for details.
560
+
561
+ [What are input IDs?](../glossary#input-ids)
562
+ past_key_values (`Tuple[Tuple[torch.Tensor]]` of length `config.n_layers`):
563
+ Contains precomputed hidden-states (key and values in the attention blocks) as computed by the model (see
564
+ `past_key_values` output below). Can be used to speed up sequential decoding. The `input_ids` which have
565
+ their past given to this model should not be passed as `input_ids` as they have already been computed.
566
+ attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
567
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
568
+
569
+ - 1 for tokens that are **not masked**,
570
+ - 0 for tokens that are **masked**.
571
+
572
+ If `past_key_values` is used, `attention_mask` needs to contain the masking strategy that was used for
573
+ `past_key_values`. In other words, the `attention_mask` always has to have the length:
574
+ `len(past_key_values) + len(input_ids)`
575
+
576
+ [What are attention masks?](../glossary#attention-mask)
577
+ token_type_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`, *optional*):
578
+ Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
579
+ 1]`:
580
+
581
+ - 0 corresponds to a *sentence A* token,
582
+ - 1 corresponds to a *sentence B* token.
583
+
584
+ [What are token type IDs?](../glossary#token-type-ids)
585
+ position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
586
+ Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
587
+ config.max_position_embeddings - 1]`.
588
+
589
+ [What are position IDs?](../glossary#position-ids)
590
+ head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
591
+ Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
592
+
593
+ - 1 indicates the head is **not masked**,
594
+ - 0 indicates the head is **masked**.
595
+
596
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
597
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
598
+ is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
599
+ model's internal embedding lookup matrix.
600
+
601
+ If `past_key_values` is used, optionally only the last `inputs_embeds` have to be input (see
602
+ `past_key_values`).
603
+ use_cache (`bool`, *optional*):
604
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
605
+ `past_key_values`).
606
+ output_attentions (`bool`, *optional*):
607
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
608
+ tensors for more detail.
609
+ output_hidden_states (`bool`, *optional*):
610
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
611
+ more detail.
612
+ return_dict (`bool`, *optional*):
613
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
614
+ """
615
+ PARALLELIZE_DOCSTRING = r"""
616
+ This is an experimental feature and is a subject to change at a moment's notice.
617
+
618
+ Uses a device map to distribute attention modules of the model across several devices. If no device map is given,
619
+ it will evenly distribute blocks across all devices.
620
+
621
+ Args:
622
+ device_map (`Dict[int, list]`, optional, defaults to None):
623
+ A dictionary that maps attention modules to devices. Note that the embedding module and LMHead are always
624
+ automatically mapped to the first device (for esoteric reasons). That means that the first device should
625
+ have fewer attention modules mapped to it than other devices. For reference, the gpt2 models have the
626
+ following number of attention modules:
627
+
628
+ - gpt2: 12
629
+ - gpt2-medium: 24
630
+ - gpt2-large: 36
631
+ - gpt2-xl: 48
632
+
633
+ Example:
634
+
635
+ ```python
636
+ # Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules:
637
+ model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
638
+ device_map = {
639
+ 0: [0, 1, 2, 3, 4, 5, 6, 7, 8],
640
+ 1: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
641
+ 2: [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
642
+ 3: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
643
+ }
644
+ model.parallelize(device_map)
645
+ ```
646
+ """
647
+ DEPARALLELIZE_DOCSTRING = r"""
648
+ Moves the model to cpu from a model parallel state.
649
+
650
+ Example:
651
+
652
+ ```python
653
+ # On a 4 GPU machine with gpt2-large:
654
+ model = GPT2LMHeadModel.from_pretrained("gpt2-large")
655
+ device_map = {
656
+ 0: [0, 1, 2, 3, 4, 5, 6, 7],
657
+ 1: [8, 9, 10, 11, 12, 13, 14, 15],
658
+ 2: [16, 17, 18, 19, 20, 21, 22, 23],
659
+ 3: [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
660
+ }
661
+ model.parallelize(device_map) # Splits the model across several devices
662
+ model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
663
+ ```
664
+ """
665
+
666
+
667
+ @add_start_docstrings(
668
+ "The bare GPT2 Model transformer outputting raw hidden-states without any specific head on top.",
669
+ GPT2_START_DOCSTRING,
670
+ )
671
+ class GPT2Model(GPT2PreTrainedModel):
672
+ _keys_to_ignore_on_load_missing = ["attn.masked_bias"]
673
+
674
+ def __init__(self, config):
675
+ super().__init__(config)
676
+
677
+ self.embed_dim = config.hidden_size
678
+
679
+ self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
680
+ self.wpe = nn.Embedding(config.max_position_embeddings, self.embed_dim)
681
+
682
+ self.drop = nn.Dropout(config.embd_pdrop)
683
+ self.h = nn.ModuleList([GPT2Block(config, layer_idx=i) for i in range(config.num_hidden_layers)])
684
+ self.ln_f = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_epsilon)
685
+
686
+ # Model parallel
687
+ self.model_parallel = False
688
+ self.device_map = None
689
+ self.gradient_checkpointing = False
690
+
691
+ # Initialize weights and apply final processing
692
+ self.post_init()
693
+
694
+ @add_start_docstrings(PARALLELIZE_DOCSTRING)
695
+ def parallelize(self, device_map=None):
696
+ # Check validity of device_map
697
+ self.device_map = (
698
+ get_device_map(len(self.h), range(torch.cuda.device_count())) if device_map is None else device_map
699
+ )
700
+ assert_device_map(self.device_map, len(self.h))
701
+ self.model_parallel = True
702
+ self.first_device = "cpu" if "cpu" in self.device_map.keys() else "cuda:" + str(min(self.device_map.keys()))
703
+ self.last_device = "cuda:" + str(max(self.device_map.keys()))
704
+ self.wte = self.wte.to(self.first_device)
705
+ self.wpe = self.wpe.to(self.first_device)
706
+ # Load onto devices
707
+ for k, v in self.device_map.items():
708
+ for block in v:
709
+ cuda_device = "cuda:" + str(k)
710
+ self.h[block] = self.h[block].to(cuda_device)
711
+ # ln_f to last
712
+ self.ln_f = self.ln_f.to(self.last_device)
713
+
714
+ @add_start_docstrings(DEPARALLELIZE_DOCSTRING)
715
+ def deparallelize(self):
716
+ self.model_parallel = False
717
+ self.device_map = None
718
+ self.first_device = "cpu"
719
+ self.last_device = "cpu"
720
+ self.wte = self.wte.to("cpu")
721
+ self.wpe = self.wpe.to("cpu")
722
+ for index in range(len(self.h)):
723
+ self.h[index] = self.h[index].to("cpu")
724
+ self.ln_f = self.ln_f.to("cpu")
725
+ torch.cuda.empty_cache()
726
+
727
+ def get_input_embeddings(self):
728
+ return self.wte
729
+
730
+ def set_input_embeddings(self, new_embeddings):
731
+ self.wte = new_embeddings
732
+
733
+ def _prune_heads(self, heads_to_prune):
734
+ """
735
+ Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
736
+ """
737
+ for layer, heads in heads_to_prune.items():
738
+ self.h[layer].attn.prune_heads(heads)
739
+
740
+ @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
741
+ @add_code_sample_docstrings(
742
+ processor_class=_TOKENIZER_FOR_DOC,
743
+ checkpoint=_CHECKPOINT_FOR_DOC,
744
+ output_type=BaseModelOutputWithPastAndCrossAttentions,
745
+ config_class=_CONFIG_FOR_DOC,
746
+ )
747
+ def forward(
748
+ self,
749
+ input_ids: Optional[torch.LongTensor] = None,
750
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
751
+ attention_mask: Optional[torch.FloatTensor] = None,
752
+ token_type_ids: Optional[torch.LongTensor] = None,
753
+ position_ids: Optional[torch.LongTensor] = None,
754
+ head_mask: Optional[torch.FloatTensor] = None,
755
+ inputs_embeds: Optional[torch.FloatTensor] = None,
756
+ encoder_hidden_states: Optional[torch.Tensor] = None,
757
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
758
+ use_cache: Optional[bool] = None,
759
+ output_attentions: Optional[bool] = None,
760
+ output_hidden_states: Optional[bool] = None,
761
+ return_dict: Optional[bool] = None,
762
+ ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]:
763
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
764
+ output_hidden_states = (
765
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
766
+ )
767
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
768
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
769
+
770
+ if input_ids is not None and inputs_embeds is not None:
771
+ raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
772
+ elif input_ids is not None:
773
+ input_shape = input_ids.size()
774
+ input_ids = input_ids.view(-1, input_shape[-1])
775
+ batch_size = input_ids.shape[0]
776
+ elif inputs_embeds is not None:
777
+ input_shape = inputs_embeds.size()[:-1]
778
+ batch_size = inputs_embeds.shape[0]
779
+ else:
780
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
781
+
782
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
783
+
784
+ if token_type_ids is not None:
785
+ token_type_ids = token_type_ids.view(-1, input_shape[-1])
786
+ if position_ids is not None:
787
+ position_ids = position_ids.view(-1, input_shape[-1])
788
+
789
+ if past_key_values is None:
790
+ past_length = 0
791
+ past_key_values = tuple([None] * len(self.h))
792
+ else:
793
+ past_length = past_key_values[0][0].size(-2)
794
+ if position_ids is None:
795
+ position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
796
+ position_ids = position_ids.unsqueeze(0).view(-1, input_shape[-1])
797
+
798
+ # GPT2Attention mask.
799
+ if attention_mask is not None:
800
+ if batch_size <= 0:
801
+ raise ValueError("batch_size has to be defined and > 0")
802
+ attention_mask = attention_mask.view(batch_size, -1)
803
+ # We create a 3D attention mask from a 2D tensor mask.
804
+ # Sizes are [batch_size, 1, 1, to_seq_length]
805
+ # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
806
+ # this attention mask is more simple than the triangular masking of causal attention
807
+ # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
808
+ attention_mask = attention_mask[:, None, None, :]
809
+
810
+ # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
811
+ # masked positions, this operation will create a tensor which is 0.0 for
812
+ # positions we want to attend and the dtype's smallest value for masked positions.
813
+ # Since we are adding it to the raw scores before the softmax, this is
814
+ # effectively the same as removing these entirely.
815
+ attention_mask = attention_mask.to(dtype=self.dtype) # fp16 compatibility
816
+ attention_mask = (1.0 - attention_mask) * torch.finfo(self.dtype).min
817
+
818
+ # If a 2D or 3D attention mask is provided for the cross-attention
819
+ # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
820
+ if self.config.add_cross_attention and encoder_hidden_states is not None:
821
+ encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
822
+ encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
823
+ if encoder_attention_mask is None:
824
+ encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
825
+ encoder_attention_mask = self.invert_attention_mask(encoder_attention_mask)
826
+ else:
827
+ encoder_attention_mask = None
828
+
829
+ # Prepare head mask if needed
830
+ # 1.0 in head_mask indicate we keep the head
831
+ # attention_probs has shape bsz x n_heads x N x N
832
+ # head_mask has shape n_layer x batch x n_heads x N x N
833
+ head_mask = self.get_head_mask(head_mask, self.config.n_layer)
834
+
835
+ if inputs_embeds is None:
836
+ inputs_embeds = self.wte(input_ids)
837
+ position_embeds = self.wpe(position_ids)
838
+ hidden_states = inputs_embeds + position_embeds
839
+
840
+ if token_type_ids is not None:
841
+ token_type_embeds = self.wte(token_type_ids)
842
+ hidden_states = hidden_states + token_type_embeds
843
+
844
+ hidden_states = self.drop(hidden_states)
845
+
846
+ output_shape = input_shape + (hidden_states.size(-1),)
847
+
848
+ presents = () if use_cache else None
849
+ all_self_attentions = () if output_attentions else None
850
+ all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
851
+ all_hidden_states = () if output_hidden_states else None
852
+ for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
853
+
854
+ # Model parallel
855
+ if self.model_parallel:
856
+ torch.cuda.set_device(hidden_states.device)
857
+ # Ensure layer_past is on same device as hidden_states (might not be correct)
858
+ if layer_past is not None:
859
+ layer_past = tuple(past_state.to(hidden_states.device) for past_state in layer_past)
860
+ # Ensure that attention_mask is always on the same device as hidden_states
861
+ if attention_mask is not None:
862
+ attention_mask = attention_mask.to(hidden_states.device)
863
+ if isinstance(head_mask, torch.Tensor):
864
+ head_mask = head_mask.to(hidden_states.device)
865
+ if output_hidden_states:
866
+ all_hidden_states = all_hidden_states + (hidden_states,)
867
+
868
+ if self.gradient_checkpointing and self.training:
869
+
870
+ if use_cache:
871
+ logger.warning(
872
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
873
+ )
874
+ use_cache = False
875
+
876
+ def create_custom_forward(module):
877
+ def custom_forward(*inputs):
878
+ # None for past_key_value
879
+ return module(*inputs, use_cache, output_attentions)
880
+
881
+ return custom_forward
882
+
883
+ outputs = torch.utils.checkpoint.checkpoint(
884
+ create_custom_forward(block),
885
+ hidden_states,
886
+ None,
887
+ attention_mask,
888
+ head_mask[i],
889
+ encoder_hidden_states,
890
+ encoder_attention_mask,
891
+ )
892
+ else:
893
+ outputs = block(
894
+ hidden_states,
895
+ layer_past=layer_past,
896
+ attention_mask=attention_mask,
897
+ head_mask=head_mask[i],
898
+ encoder_hidden_states=encoder_hidden_states,
899
+ encoder_attention_mask=encoder_attention_mask,
900
+ use_cache=use_cache,
901
+ output_attentions=output_attentions,
902
+ )
903
+
904
+ hidden_states = outputs[0]
905
+ if use_cache is True:
906
+ presents = presents + (outputs[1],)
907
+
908
+ if output_attentions:
909
+ all_self_attentions = all_self_attentions + (outputs[2 if use_cache else 1],)
910
+ if self.config.add_cross_attention:
911
+ all_cross_attentions = all_cross_attentions + (outputs[3 if use_cache else 2],)
912
+
913
+ # Model Parallel: If it's the last layer for that device, put things on the next device
914
+ if self.model_parallel:
915
+ for k, v in self.device_map.items():
916
+ if i == v[-1] and "cuda:" + str(k) != self.last_device:
917
+ hidden_states = hidden_states.to("cuda:" + str(k + 1))
918
+
919
+ hidden_states = self.ln_f(hidden_states)
920
+
921
+ hidden_states = hidden_states.view(output_shape)
922
+ # Add last hidden state
923
+ if output_hidden_states:
924
+ all_hidden_states = all_hidden_states + (hidden_states,)
925
+
926
+ if not return_dict:
927
+ return tuple(
928
+ v
929
+ for v in [hidden_states, presents, all_hidden_states, all_self_attentions, all_cross_attentions]
930
+ if v is not None
931
+ )
932
+
933
+ return BaseModelOutputWithPastAndCrossAttentions(
934
+ last_hidden_state=hidden_states,
935
+ past_key_values=presents,
936
+ hidden_states=all_hidden_states,
937
+ attentions=all_self_attentions,
938
+ cross_attentions=all_cross_attentions,
939
+ )
940
+
941
+
942
+ @add_start_docstrings(
943
+ """
944
+ The GPT2 Model transformer with a language modeling head on top (linear layer with weights tied to the input
945
+ embeddings).
946
+ """,
947
+ GPT2_START_DOCSTRING,
948
+ )
949
+ class GPT2CustomLMHeadModel(GPT2PreTrainedModel, GenerationMixin):
950
+ _keys_to_ignore_on_load_missing = [r"attn.masked_bias", r"attn.bias", r"lm_head.weight"]
951
+
952
+ def __init__(self, config):
953
+ super().__init__(config)
954
+ self.transformer = GPT2Model(config)
955
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
956
+
957
+ # Model parallel
958
+ self.model_parallel = False
959
+ self.device_map = None
960
+
961
+ # Initialize weights and apply final processing
962
+ self.post_init()
963
+
964
+ @add_start_docstrings(PARALLELIZE_DOCSTRING)
965
+ def parallelize(self, device_map=None):
966
+ self.device_map = (
967
+ get_device_map(len(self.transformer.h), range(torch.cuda.device_count()))
968
+ if device_map is None
969
+ else device_map
970
+ )
971
+ assert_device_map(self.device_map, len(self.transformer.h))
972
+ self.transformer.parallelize(self.device_map)
973
+ self.lm_head = self.lm_head.to(self.transformer.first_device)
974
+ self.model_parallel = True
975
+
976
+ @add_start_docstrings(DEPARALLELIZE_DOCSTRING)
977
+ def deparallelize(self):
978
+ self.transformer.deparallelize()
979
+ self.transformer = self.transformer.to("cpu")
980
+ self.lm_head = self.lm_head.to("cpu")
981
+ self.model_parallel = False
982
+ torch.cuda.empty_cache()
983
+
984
+ def get_output_embeddings(self):
985
+ return self.lm_head
986
+
987
+ def set_output_embeddings(self, new_embeddings):
988
+ self.lm_head = new_embeddings
989
+
990
+ def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
991
+ token_type_ids = kwargs.get("token_type_ids", None)
992
+ # only last token for inputs_ids if past is defined in kwargs
993
+ if past:
994
+ input_ids = input_ids[:, -1].unsqueeze(-1)
995
+ if token_type_ids is not None:
996
+ token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
997
+
998
+ attention_mask = kwargs.get("attention_mask", None)
999
+ position_ids = kwargs.get("position_ids", None)
1000
+
1001
+ if attention_mask is not None and position_ids is None:
1002
+ # create position_ids on the fly for batch generation
1003
+ position_ids = attention_mask.long().cumsum(-1) - 1
1004
+ position_ids.masked_fill_(attention_mask == 0, 1)
1005
+ if past:
1006
+ position_ids = position_ids[:, -1].unsqueeze(-1)
1007
+ else:
1008
+ position_ids = None
1009
+ return {
1010
+ "input_ids": input_ids,
1011
+ "past_key_values": past,
1012
+ "use_cache": kwargs.get("use_cache"),
1013
+ "position_ids": position_ids,
1014
+ "attention_mask": attention_mask,
1015
+ "token_type_ids": token_type_ids,
1016
+ }
1017
+
1018
+ @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
1019
+ @add_code_sample_docstrings(
1020
+ processor_class=_TOKENIZER_FOR_DOC,
1021
+ checkpoint=_CHECKPOINT_FOR_DOC,
1022
+ output_type=CausalLMOutputWithCrossAttentions,
1023
+ config_class=_CONFIG_FOR_DOC,
1024
+ )
1025
+ def forward(
1026
+ self,
1027
+ input_ids: Optional[torch.LongTensor] = None,
1028
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
1029
+ attention_mask: Optional[torch.FloatTensor] = None,
1030
+ token_type_ids: Optional[torch.LongTensor] = None,
1031
+ position_ids: Optional[torch.LongTensor] = None,
1032
+ head_mask: Optional[torch.FloatTensor] = None,
1033
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1034
+ encoder_hidden_states: Optional[torch.Tensor] = None,
1035
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
1036
+ labels: Optional[torch.LongTensor] = None,
1037
+ use_cache: Optional[bool] = None,
1038
+ output_attentions: Optional[bool] = None,
1039
+ output_hidden_states: Optional[bool] = None,
1040
+ return_dict: Optional[bool] = None,
1041
+ ) -> Union[Tuple, CausalLMOutputWithCrossAttentions]:
1042
+ r"""
1043
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1044
+ Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
1045
+ `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100`
1046
+ are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
1047
+ """
1048
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1049
+
1050
+ transformer_outputs = self.transformer(
1051
+ input_ids,
1052
+ past_key_values=past_key_values,
1053
+ attention_mask=attention_mask,
1054
+ token_type_ids=token_type_ids,
1055
+ position_ids=position_ids,
1056
+ head_mask=head_mask,
1057
+ inputs_embeds=inputs_embeds,
1058
+ encoder_hidden_states=encoder_hidden_states,
1059
+ encoder_attention_mask=encoder_attention_mask,
1060
+ use_cache=use_cache,
1061
+ output_attentions=output_attentions,
1062
+ output_hidden_states=output_hidden_states,
1063
+ return_dict=return_dict,
1064
+ )
1065
+ hidden_states = transformer_outputs[0]
1066
+
1067
+ # Set device for model parallelism
1068
+ if self.model_parallel:
1069
+ torch.cuda.set_device(self.transformer.first_device)
1070
+ hidden_states = hidden_states.to(self.lm_head.weight.device)
1071
+
1072
+ lm_logits = self.lm_head(hidden_states)
1073
+
1074
+ loss = None
1075
+ if labels is not None:
1076
+ # Shift so that tokens < n predict n
1077
+ shift_logits = lm_logits[..., :-1, :].contiguous()
1078
+ shift_labels = labels[..., 1:].contiguous()
1079
+ # Flatten the tokens
1080
+ loss_fct = CrossEntropyLoss()
1081
+ loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
1082
+
1083
+ if not return_dict:
1084
+ output = (lm_logits,) + transformer_outputs[1:]
1085
+ return ((loss,) + output) if loss is not None else output
1086
+
1087
+ return CausalLMOutputWithCrossAttentions(
1088
+ loss=loss,
1089
+ logits=lm_logits,
1090
+ past_key_values=transformer_outputs.past_key_values,
1091
+ hidden_states=transformer_outputs.hidden_states,
1092
+ attentions=transformer_outputs.attentions,
1093
+ cross_attentions=transformer_outputs.cross_attentions,
1094
+ )
1095
+
1096
+ @staticmethod
1097
+ def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]:
1098
+ """
1099
+ This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
1100
+ [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
1101
+ beam_idx at every generation step.
1102
+ """
1103
+ return tuple(
1104
+ tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
1105
+ for layer_past in past
1106
+ )
1107
+
1108
+
1109
+ @add_start_docstrings(
1110
+ """
1111
+ The GPT2 Model transformer with a language modeling and a multiple-choice classification head on top e.g. for
1112
+ RocStories/SWAG tasks. The two heads are two linear layers. The language modeling head has its weights tied to the
1113
+ input embeddings, the classification head takes as input the input of a specified classification token index in the
1114
+ input sequence).
1115
+ """,
1116
+ GPT2_START_DOCSTRING,
1117
+ )
1118
+ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
1119
+ _keys_to_ignore_on_load_missing = [r"attn.masked_bias", r"attn.bias", r"lm_head.weight"]
1120
+
1121
+ def __init__(self, config):
1122
+ super().__init__(config)
1123
+ config.num_labels = 1
1124
+ self.transformer = GPT2Model(config)
1125
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
1126
+ self.multiple_choice_head = SequenceSummary(config)
1127
+
1128
+ # Model parallel
1129
+ self.model_parallel = False
1130
+ self.device_map = None
1131
+
1132
+ # Initialize weights and apply final processing
1133
+ self.post_init()
1134
+
1135
+ @add_start_docstrings(PARALLELIZE_DOCSTRING)
1136
+ def parallelize(self, device_map=None):
1137
+ self.device_map = (
1138
+ get_device_map(len(self.transformer.h), range(torch.cuda.device_count()))
1139
+ if device_map is None
1140
+ else device_map
1141
+ )
1142
+ assert_device_map(self.device_map, len(self.transformer.h))
1143
+ self.transformer.parallelize(self.device_map)
1144
+ self.lm_head = self.lm_head.to(self.transformer.first_device)
1145
+ self.multiple_choice_head = self.multiple_choice_head.to(self.transformer.first_device)
1146
+ self.model_parallel = True
1147
+
1148
+ @add_start_docstrings(DEPARALLELIZE_DOCSTRING)
1149
+ def deparallelize(self):
1150
+ self.transformer.deparallelize()
1151
+ self.transformer = self.transformer.to("cpu")
1152
+ self.lm_head = self.lm_head.to("cpu")
1153
+ self.multiple_choice_head = self.multiple_choice_head.to("cpu")
1154
+ self.model_parallel = False
1155
+ torch.cuda.empty_cache()
1156
+
1157
+ def get_output_embeddings(self):
1158
+ return self.lm_head
1159
+
1160
+ def set_output_embeddings(self, new_embeddings):
1161
+ self.lm_head = new_embeddings
1162
+
1163
+ def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
1164
+ token_type_ids = kwargs.get("token_type_ids", None)
1165
+ # only last token for inputs_ids if past is defined in kwargs
1166
+ if past:
1167
+ input_ids = input_ids[:, -1].unsqueeze(-1)
1168
+ if token_type_ids is not None:
1169
+ token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
1170
+
1171
+ attention_mask = kwargs.get("attention_mask", None)
1172
+ position_ids = kwargs.get("position_ids", None)
1173
+
1174
+ if attention_mask is not None and position_ids is None:
1175
+ # create position_ids on the fly for batch generation
1176
+ position_ids = attention_mask.long().cumsum(-1) - 1
1177
+ position_ids.masked_fill_(attention_mask == 0, 1)
1178
+ if past:
1179
+ position_ids = position_ids[:, -1].unsqueeze(-1)
1180
+ else:
1181
+ position_ids = None
1182
+
1183
+ return {
1184
+ "input_ids": input_ids,
1185
+ "past_key_values": past,
1186
+ "use_cache": kwargs.get("use_cache"),
1187
+ "position_ids": position_ids,
1188
+ "attention_mask": attention_mask,
1189
+ "token_type_ids": token_type_ids,
1190
+ }
1191
+
1192
+ @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
1193
+ @replace_return_docstrings(output_type=GPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
1194
+ def forward(
1195
+ self,
1196
+ input_ids: Optional[torch.LongTensor] = None,
1197
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
1198
+ attention_mask: Optional[torch.FloatTensor] = None,
1199
+ token_type_ids: Optional[torch.LongTensor] = None,
1200
+ position_ids: Optional[torch.LongTensor] = None,
1201
+ head_mask: Optional[torch.FloatTensor] = None,
1202
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1203
+ mc_token_ids: Optional[torch.LongTensor] = None,
1204
+ labels: Optional[torch.LongTensor] = None,
1205
+ mc_labels: Optional[torch.LongTensor] = None,
1206
+ use_cache: Optional[bool] = None,
1207
+ output_attentions: Optional[bool] = None,
1208
+ output_hidden_states: Optional[bool] = None,
1209
+ return_dict: Optional[bool] = None,
1210
+ **kwargs,
1211
+ ) -> Union[Tuple, GPT2DoubleHeadsModelOutput]:
1212
+ r"""
1213
+ mc_token_ids (`torch.LongTensor` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
1214
+ Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
1215
+ 1]`.
1216
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1217
+ Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
1218
+ `labels = input_ids`. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`. All labels set to
1219
+ `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size - 1]`
1220
+ mc_labels (`torch.LongTensor` of shape `(batch_size)`, *optional*):
1221
+ Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
1222
+ where *num_choices* is the size of the second dimension of the input tensors. (see *input_ids* above)
1223
+
1224
+ Return:
1225
+
1226
+ Example:
1227
+
1228
+ ```python
1229
+ >>> import torch
1230
+ >>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
1231
+
1232
+ >>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
1233
+ >>> model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
1234
+
1235
+ >>> # Add a [CLS] to the vocabulary (we should train it also!)
1236
+ >>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
1237
+ >>> # Update the model embeddings with the new vocabulary size
1238
+ >>> embedding_layer = model.resize_token_embeddings(len(tokenizer))
1239
+
1240
+ >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
1241
+ >>> encoded_choices = [tokenizer.encode(s) for s in choices]
1242
+ >>> cls_token_location = [tokens.index(tokenizer.cls_token_id) for tokens in encoded_choices]
1243
+
1244
+ >>> input_ids = torch.tensor(encoded_choices).unsqueeze(0) # Batch size: 1, number of choices: 2
1245
+ >>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
1246
+
1247
+ >>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
1248
+ >>> lm_logits = outputs.logits
1249
+ >>> mc_logits = outputs.mc_logits
1250
+ ```"""
1251
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1252
+
1253
+ transformer_outputs = self.transformer(
1254
+ input_ids,
1255
+ past_key_values=past_key_values,
1256
+ attention_mask=attention_mask,
1257
+ token_type_ids=token_type_ids,
1258
+ position_ids=position_ids,
1259
+ head_mask=head_mask,
1260
+ inputs_embeds=inputs_embeds,
1261
+ use_cache=use_cache,
1262
+ output_attentions=output_attentions,
1263
+ output_hidden_states=output_hidden_states,
1264
+ return_dict=return_dict,
1265
+ )
1266
+
1267
+ hidden_states = transformer_outputs[0]
1268
+
1269
+ # Set device for model parallelism
1270
+ if self.model_parallel:
1271
+ torch.cuda.set_device(self.transformer.first_device)
1272
+ hidden_states = hidden_states.to(self.lm_head.weight.device)
1273
+
1274
+ lm_logits = self.lm_head(hidden_states)
1275
+ mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids).squeeze(-1)
1276
+
1277
+ mc_loss = None
1278
+ if mc_labels is not None:
1279
+ loss_fct = CrossEntropyLoss()
1280
+ mc_loss = loss_fct(mc_logits.view(-1, mc_logits.size(-1)), mc_labels.view(-1))
1281
+ lm_loss = None
1282
+ if labels is not None:
1283
+ shift_logits = lm_logits[..., :-1, :].contiguous()
1284
+ shift_labels = labels[..., 1:].contiguous()
1285
+ loss_fct = CrossEntropyLoss()
1286
+ lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
1287
+
1288
+ if not return_dict:
1289
+ output = (lm_logits, mc_logits) + transformer_outputs[1:]
1290
+ if mc_loss is not None:
1291
+ output = (mc_loss,) + output
1292
+ return ((lm_loss,) + output) if lm_loss is not None else output
1293
+
1294
+ return GPT2DoubleHeadsModelOutput(
1295
+ loss=lm_loss,
1296
+ mc_loss=mc_loss,
1297
+ logits=lm_logits,
1298
+ mc_logits=mc_logits,
1299
+ past_key_values=transformer_outputs.past_key_values,
1300
+ hidden_states=transformer_outputs.hidden_states,
1301
+ attentions=transformer_outputs.attentions,
1302
+ )
1303
+
1304
+ @staticmethod
1305
+ def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> Tuple[Tuple[torch.Tensor]]:
1306
+ """
1307
+ This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
1308
+ [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
1309
+ beam_idx at every generation step.
1310
+ """
1311
+ return tuple(
1312
+ tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
1313
+ for layer_past in past
1314
+ )
1315
+
1316
+
1317
+ @add_start_docstrings(
1318
+ """
1319
+ The GPT2 Model transformer with a sequence classification head on top (linear layer).
1320
+
1321
+ [`GPT2ForSequenceClassification`] uses the last token in order to do the classification, as other causal models
1322
+ (e.g. GPT-1) do.
1323
+
1324
+ Since it does classification on the last token, it requires to know the position of the last token. If a
1325
+ `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
1326
+ no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
1327
+ padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
1328
+ each row of the batch).
1329
+ """,
1330
+ GPT2_START_DOCSTRING,
1331
+ )
1332
+ class GPT2ForSequenceClassification(GPT2PreTrainedModel):
1333
+ _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
1334
+
1335
+ def __init__(self, config):
1336
+ super().__init__(config)
1337
+ self.num_labels = config.num_labels
1338
+ self.transformer = GPT2Model(config)
1339
+ self.score = nn.Linear(config.n_embd, self.num_labels, bias=False)
1340
+
1341
+ # Model parallel
1342
+ self.model_parallel = False
1343
+ self.device_map = None
1344
+
1345
+ # Initialize weights and apply final processing
1346
+ self.post_init()
1347
+
1348
+ @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
1349
+ @add_code_sample_docstrings(
1350
+ processor_class=_TOKENIZER_FOR_DOC,
1351
+ checkpoint="microsoft/DialogRPT-updown",
1352
+ output_type=SequenceClassifierOutputWithPast,
1353
+ config_class=_CONFIG_FOR_DOC,
1354
+ expected_output="'LABEL_0'",
1355
+ expected_loss=5.28,
1356
+ )
1357
+ def forward(
1358
+ self,
1359
+ input_ids: Optional[torch.LongTensor] = None,
1360
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
1361
+ attention_mask: Optional[torch.FloatTensor] = None,
1362
+ token_type_ids: Optional[torch.LongTensor] = None,
1363
+ position_ids: Optional[torch.LongTensor] = None,
1364
+ head_mask: Optional[torch.FloatTensor] = None,
1365
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1366
+ labels: Optional[torch.LongTensor] = None,
1367
+ use_cache: Optional[bool] = None,
1368
+ output_attentions: Optional[bool] = None,
1369
+ output_hidden_states: Optional[bool] = None,
1370
+ return_dict: Optional[bool] = None,
1371
+ ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
1372
+ r"""
1373
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1374
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1375
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1376
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1377
+ """
1378
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1379
+
1380
+ transformer_outputs = self.transformer(
1381
+ input_ids,
1382
+ past_key_values=past_key_values,
1383
+ attention_mask=attention_mask,
1384
+ token_type_ids=token_type_ids,
1385
+ position_ids=position_ids,
1386
+ head_mask=head_mask,
1387
+ inputs_embeds=inputs_embeds,
1388
+ use_cache=use_cache,
1389
+ output_attentions=output_attentions,
1390
+ output_hidden_states=output_hidden_states,
1391
+ return_dict=return_dict,
1392
+ )
1393
+ hidden_states = transformer_outputs[0]
1394
+ logits = self.score(hidden_states)
1395
+
1396
+ if input_ids is not None:
1397
+ batch_size, sequence_length = input_ids.shape[:2]
1398
+ else:
1399
+ batch_size, sequence_length = inputs_embeds.shape[:2]
1400
+
1401
+ assert (
1402
+ self.config.pad_token_id is not None or batch_size == 1
1403
+ ), "Cannot handle batch sizes > 1 if no padding token is defined."
1404
+ if self.config.pad_token_id is None:
1405
+ sequence_lengths = -1
1406
+ else:
1407
+ if input_ids is not None:
1408
+ sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1
1409
+ else:
1410
+ sequence_lengths = -1
1411
+ logger.warning(
1412
+ f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
1413
+ "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
1414
+ )
1415
+
1416
+ pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
1417
+
1418
+ loss = None
1419
+ if labels is not None:
1420
+ if self.config.problem_type is None:
1421
+ if self.num_labels == 1:
1422
+ self.config.problem_type = "regression"
1423
+ elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
1424
+ self.config.problem_type = "single_label_classification"
1425
+ else:
1426
+ self.config.problem_type = "multi_label_classification"
1427
+
1428
+ if self.config.problem_type == "regression":
1429
+ loss_fct = MSELoss()
1430
+ if self.num_labels == 1:
1431
+ loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
1432
+ else:
1433
+ loss = loss_fct(pooled_logits, labels)
1434
+ elif self.config.problem_type == "single_label_classification":
1435
+ loss_fct = CrossEntropyLoss()
1436
+ loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
1437
+ elif self.config.problem_type == "multi_label_classification":
1438
+ loss_fct = BCEWithLogitsLoss()
1439
+ loss = loss_fct(pooled_logits, labels)
1440
+ if not return_dict:
1441
+ output = (pooled_logits,) + transformer_outputs[1:]
1442
+ return ((loss,) + output) if loss is not None else output
1443
+
1444
+ return SequenceClassifierOutputWithPast(
1445
+ loss=loss,
1446
+ logits=pooled_logits,
1447
+ past_key_values=transformer_outputs.past_key_values,
1448
+ hidden_states=transformer_outputs.hidden_states,
1449
+ attentions=transformer_outputs.attentions,
1450
+ )
1451
+
1452
+
1453
+ @add_start_docstrings(
1454
+ """
1455
+ GPT2 Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
1456
+ Named-Entity-Recognition (NER) tasks.
1457
+ """,
1458
+ GPT2_START_DOCSTRING,
1459
+ )
1460
+ class GPT2ForTokenClassification(GPT2PreTrainedModel):
1461
+ def __init__(self, config):
1462
+ super().__init__(config)
1463
+ self.num_labels = config.num_labels
1464
+
1465
+ self.transformer = GPT2Model(config)
1466
+ if hasattr(config, "classifier_dropout") and config.classifier_dropout is not None:
1467
+ classifier_dropout = config.classifier_dropout
1468
+ elif hasattr(config, "hidden_dropout") and config.hidden_dropout is not None:
1469
+ classifier_dropout = config.hidden_dropout
1470
+ else:
1471
+ classifier_dropout = 0.1
1472
+ self.dropout = nn.Dropout(classifier_dropout)
1473
+ self.classifier = nn.Linear(config.hidden_size, config.num_labels)
1474
+
1475
+ # Model parallel
1476
+ self.model_parallel = False
1477
+ self.device_map = None
1478
+
1479
+ # Initialize weights and apply final processing
1480
+ self.post_init()
1481
+
1482
+ @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
1483
+ # fmt: off
1484
+ @add_code_sample_docstrings(
1485
+ processor_class=_TOKENIZER_FOR_DOC,
1486
+ checkpoint="brad1141/gpt2-finetuned-comp2",
1487
+ output_type=TokenClassifierOutput,
1488
+ config_class=_CONFIG_FOR_DOC,
1489
+ expected_loss=0.25,
1490
+ expected_output=["Lead", "Lead", "Lead", "Position", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead"],
1491
+ )
1492
+ # fmt: on
1493
+ def forward(
1494
+ self,
1495
+ input_ids: Optional[torch.LongTensor] = None,
1496
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
1497
+ attention_mask: Optional[torch.FloatTensor] = None,
1498
+ token_type_ids: Optional[torch.LongTensor] = None,
1499
+ position_ids: Optional[torch.LongTensor] = None,
1500
+ head_mask: Optional[torch.FloatTensor] = None,
1501
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1502
+ labels: Optional[torch.LongTensor] = None,
1503
+ use_cache: Optional[bool] = None,
1504
+ output_attentions: Optional[bool] = None,
1505
+ output_hidden_states: Optional[bool] = None,
1506
+ return_dict: Optional[bool] = None,
1507
+ ) -> Union[Tuple, TokenClassifierOutput]:
1508
+ r"""
1509
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1510
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1511
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1512
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1513
+ """
1514
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1515
+
1516
+ transformer_outputs = self.transformer(
1517
+ input_ids,
1518
+ past_key_values=past_key_values,
1519
+ attention_mask=attention_mask,
1520
+ token_type_ids=token_type_ids,
1521
+ position_ids=position_ids,
1522
+ head_mask=head_mask,
1523
+ inputs_embeds=inputs_embeds,
1524
+ use_cache=use_cache,
1525
+ output_attentions=output_attentions,
1526
+ output_hidden_states=output_hidden_states,
1527
+ return_dict=return_dict,
1528
+ )
1529
+
1530
+ hidden_states = transformer_outputs[0]
1531
+ hidden_states = self.dropout(hidden_states)
1532
+ logits = self.classifier(hidden_states)
1533
+
1534
+ loss = None
1535
+ if labels is not None:
1536
+ loss_fct = CrossEntropyLoss()
1537
+ loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
1538
+
1539
+ if not return_dict:
1540
+ output = (logits,) + transformer_outputs[2:]
1541
+ return ((loss,) + output) if loss is not None else output
1542
+
1543
+ return TokenClassifierOutput(
1544
+ loss=loss,
1545
+ logits=logits,
1546
+ hidden_states=transformer_outputs.hidden_states,
1547
+ attentions=transformer_outputs.attentions,
1548
+ )
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677f711dfade47fe3bb712a5af6dc3b8bfeb1e22967a4f78777df319d2e15909
3
+ size 1781199
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
@@ -0,0 +1,1853 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<|endoftext|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ }
15
+ ],
16
+ "normalizer": null,
17
+ "pre_tokenizer": {
18
+ "type": "ByteLevel",
19
+ "add_prefix_space": false,
20
+ "trim_offsets": true,
21
+ "use_regex": true
22
+ },
23
+ "post_processor": {
24
+ "type": "ByteLevel",
25
+ "add_prefix_space": true,
26
+ "trim_offsets": false,
27
+ "use_regex": true
28
+ },
29
+ "decoder": {
30
+ "type": "ByteLevel",
31
+ "add_prefix_space": true,
32
+ "trim_offsets": true,
33
+ "use_regex": true
34
+ },
35
+ "model": {
36
+ "type": "BPE",
37
+ "dropout": null,
38
+ "unk_token": null,
39
+ "continuing_subword_prefix": "",
40
+ "end_of_word_suffix": "",
41
+ "fuse_unk": false,
42
+ "vocab": {
43
+ "<|endoftext|>": 0,
44
+ "!": 1,
45
+ "\"": 2,
46
+ "#": 3,
47
+ "$": 4,
48
+ "%": 5,
49
+ "&": 6,
50
+ "'": 7,
51
+ "(": 8,
52
+ ")": 9,
53
+ "*": 10,
54
+ "+": 11,
55
+ ",": 12,
56
+ "-": 13,
57
+ ".": 14,
58
+ "/": 15,
59
+ "0": 16,
60
+ "1": 17,
61
+ "2": 18,
62
+ "3": 19,
63
+ "4": 20,
64
+ "5": 21,
65
+ "6": 22,
66
+ "7": 23,
67
+ "8": 24,
68
+ "9": 25,
69
+ ":": 26,
70
+ ";": 27,
71
+ "<": 28,
72
+ "=": 29,
73
+ ">": 30,
74
+ "?": 31,
75
+ "@": 32,
76
+ "A": 33,
77
+ "B": 34,
78
+ "C": 35,
79
+ "D": 36,
80
+ "E": 37,
81
+ "F": 38,
82
+ "G": 39,
83
+ "H": 40,
84
+ "I": 41,
85
+ "J": 42,
86
+ "K": 43,
87
+ "L": 44,
88
+ "M": 45,
89
+ "N": 46,
90
+ "O": 47,
91
+ "P": 48,
92
+ "Q": 49,
93
+ "R": 50,
94
+ "S": 51,
95
+ "T": 52,
96
+ "U": 53,
97
+ "V": 54,
98
+ "W": 55,
99
+ "X": 56,
100
+ "Y": 57,
101
+ "Z": 58,
102
+ "[": 59,
103
+ "\\": 60,
104
+ "]": 61,
105
+ "^": 62,
106
+ "_": 63,
107
+ "`": 64,
108
+ "a": 65,
109
+ "b": 66,
110
+ "c": 67,
111
+ "d": 68,
112
+ "e": 69,
113
+ "f": 70,
114
+ "g": 71,
115
+ "h": 72,
116
+ "i": 73,
117
+ "j": 74,
118
+ "k": 75,
119
+ "l": 76,
120
+ "m": 77,
121
+ "n": 78,
122
+ "o": 79,
123
+ "p": 80,
124
+ "q": 81,
125
+ "r": 82,
126
+ "s": 83,
127
+ "t": 84,
128
+ "u": 85,
129
+ "v": 86,
130
+ "w": 87,
131
+ "x": 88,
132
+ "y": 89,
133
+ "z": 90,
134
+ "|": 91,
135
+ "}": 92,
136
+ "~": 93,
137
+ "¡": 94,
138
+ "¢": 95,
139
+ "£": 96,
140
+ "¤": 97,
141
+ "¥": 98,
142
+ "¦": 99,
143
+ "§": 100,
144
+ "¨": 101,
145
+ "©": 102,
146
+ "ª": 103,
147
+ "«": 104,
148
+ "¬": 105,
149
+ "®": 106,
150
+ "¯": 107,
151
+ "°": 108,
152
+ "±": 109,
153
+ "²": 110,
154
+ "³": 111,
155
+ "´": 112,
156
+ "µ": 113,
157
+ "¶": 114,
158
+ "·": 115,
159
+ "¸": 116,
160
+ "¹": 117,
161
+ "º": 118,
162
+ "»": 119,
163
+ "¼": 120,
164
+ "½": 121,
165
+ "¾": 122,
166
+ "¿": 123,
167
+ "Â": 124,
168
+ "Ã": 125,
169
+ "Ä": 126,
170
+ "Å": 127,
171
+ "Æ": 128,
172
+ "Ç": 129,
173
+ "È": 130,
174
+ "É": 131,
175
+ "Ê": 132,
176
+ "Ë": 133,
177
+ "Ì": 134,
178
+ "Í": 135,
179
+ "Î": 136,
180
+ "Ï": 137,
181
+ "Ð": 138,
182
+ "Ñ": 139,
183
+ "Ö": 140,
184
+ "×": 141,
185
+ "Ø": 142,
186
+ "Ù": 143,
187
+ "Ü": 144,
188
+ "à": 145,
189
+ "á": 146,
190
+ "â": 147,
191
+ "ã": 148,
192
+ "ä": 149,
193
+ "å": 150,
194
+ "æ": 151,
195
+ "ç": 152,
196
+ "è": 153,
197
+ "é": 154,
198
+ "ë": 155,
199
+ "ì": 156,
200
+ "ï": 157,
201
+ "Ċ": 158,
202
+ "Ġ": 159,
203
+ "Ģ": 160,
204
+ "ģ": 161,
205
+ "Ĥ": 162,
206
+ "ĥ": 163,
207
+ "Ħ": 164,
208
+ "ħ": 165,
209
+ "Ĩ": 166,
210
+ "ĩ": 167,
211
+ "Ī": 168,
212
+ "ī": 169,
213
+ "Ĭ": 170,
214
+ "ĭ": 171,
215
+ "Į": 172,
216
+ "į": 173,
217
+ "İ": 174,
218
+ "ı": 175,
219
+ "IJ": 176,
220
+ "ij": 177,
221
+ "Ĵ": 178,
222
+ "ĵ": 179,
223
+ "Ķ": 180,
224
+ "ķ": 181,
225
+ "ĸ": 182,
226
+ "Ĺ": 183,
227
+ "ĺ": 184,
228
+ "Ļ": 185,
229
+ "ļ": 186,
230
+ "Ľ": 187,
231
+ "ľ": 188,
232
+ "Ŀ": 189,
233
+ "ŀ": 190,
234
+ "Ł": 191,
235
+ "ł": 192,
236
+ "Ń": 193,
237
+ "Ġt": 194,
238
+ "he": 195,
239
+ "Ġa": 196,
240
+ "in": 197,
241
+ "Ġthe": 198,
242
+ "er": 199,
243
+ "on": 200,
244
+ "Ġ,": 201,
245
+ "re": 202,
246
+ "Ġs": 203,
247
+ "ed": 204,
248
+ "Ġo": 205,
249
+ "Ġw": 206,
250
+ "nd": 207,
251
+ "at": 208,
252
+ "Ġ.": 209,
253
+ "or": 210,
254
+ "it": 211,
255
+ "Ġc": 212,
256
+ "en": 213,
257
+ "Ġf": 214,
258
+ "is": 215,
259
+ "es": 216,
260
+ "ar": 217,
261
+ "Ġof": 218,
262
+ "Ġb": 219,
263
+ "an": 220,
264
+ "Ġin": 221,
265
+ "al": 222,
266
+ "ing": 223,
267
+ "Ġp": 224,
268
+ "Ġand": 225,
269
+ "as": 226,
270
+ "Ġto": 227,
271
+ "ro": 228,
272
+ "ic": 229,
273
+ "Ġm": 230,
274
+ "Ġd": 231,
275
+ "Ġh": 232,
276
+ "ion": 233,
277
+ "le": 234,
278
+ "ou": 235,
279
+ "ĠT": 236,
280
+ "Ġre": 237,
281
+ "Ġ=": 238,
282
+ "Ġ\"": 239,
283
+ "ĠA": 240,
284
+ "ĠS": 241,
285
+ "ent": 242,
286
+ "il": 243,
287
+ "Ġth": 244,
288
+ "Ġ1": 245,
289
+ "st": 246,
290
+ "ĠC": 247,
291
+ "el": 248,
292
+ "om": 249,
293
+ "Ġl": 250,
294
+ "am": 251,
295
+ "ĠĊ": 252,
296
+ "Ġe": 253,
297
+ "Ġn": 254,
298
+ "Ġ@": 255,
299
+ "ad": 256,
300
+ "ac": 257,
301
+ "Ġwas": 258,
302
+ "ĠM": 259,
303
+ "ur": 260,
304
+ "ĠThe": 261,
305
+ "ec": 262,
306
+ "Ġon": 263,
307
+ "ly": 264,
308
+ "ĠB": 265,
309
+ "ĠI": 266,
310
+ "Ġg": 267,
311
+ "Ġ'": 268,
312
+ "et": 269,
313
+ "ol": 270,
314
+ "id": 271,
315
+ "iv": 272,
316
+ "im": 273,
317
+ "Ġfor": 274,
318
+ "ir": 275,
319
+ "-@": 276,
320
+ "Ġ@-@": 277,
321
+ "ig": 278,
322
+ "ot": 279,
323
+ "ter": 280,
324
+ "Ġas": 281,
325
+ "ĠH": 282,
326
+ "us": 283,
327
+ "ow": 284,
328
+ "Ġst": 285,
329
+ "ut": 286,
330
+ "ith": 287,
331
+ "ay": 288,
332
+ "Ġ2": 289,
333
+ "ĠP": 290,
334
+ "ation": 291,
335
+ "ver": 292,
336
+ "Ġbe": 293,
337
+ "her": 294,
338
+ "Ġthat": 295,
339
+ "Ġwith": 296,
340
+ "ĠR": 297,
341
+ "ce": 298,
342
+ "th": 299,
343
+ "ĠD": 300,
344
+ "Ġis": 301,
345
+ "un": 302,
346
+ "em": 303,
347
+ "ĠF": 304,
348
+ "Ġwh": 305,
349
+ "ul": 306,
350
+ "Ġby": 307,
351
+ "Ġal": 308,
352
+ "ch": 309,
353
+ "Ġ)": 310,
354
+ "Ġ(": 311,
355
+ "ĠW": 312,
356
+ "Ġcon": 313,
357
+ "ra": 314,
358
+ "ĠG": 315,
359
+ "os": 316,
360
+ "ĠL": 317,
361
+ "ĠN": 318,
362
+ "Ġat": 319,
363
+ "ers": 320,
364
+ "ct": 321,
365
+ "Ġit": 322,
366
+ "Ġ19": 323,
367
+ "rom": 324,
368
+ "and": 325,
369
+ "Ġan": 326,
370
+ "um": 327,
371
+ "est": 328,
372
+ "ĠJ": 329,
373
+ "ag": 330,
374
+ "Ġhe": 331,
375
+ "00": 332,
376
+ "ist": 333,
377
+ "ain": 334,
378
+ "od": 335,
379
+ "av": 336,
380
+ "ri": 337,
381
+ "ĠE": 338,
382
+ "ĠO": 339,
383
+ "Ġfrom": 340,
384
+ "Ġcom": 341,
385
+ "Ġhis": 342,
386
+ "op": 343,
387
+ "Ġpro": 344,
388
+ "res": 345,
389
+ "ies": 346,
390
+ "if": 347,
391
+ "Ġv": 348,
392
+ "ort": 349,
393
+ "ere": 350,
394
+ "ill": 351,
395
+ "ld": 352,
396
+ "Ġde": 353,
397
+ "pp": 354,
398
+ "Ġsu": 355,
399
+ "ore": 356,
400
+ "ĠIn": 357,
401
+ "Ġr": 358,
402
+ "Ġse": 359,
403
+ "Ġwere": 360,
404
+ "ew": 361,
405
+ "ong": 362,
406
+ "igh": 363,
407
+ "ard": 364,
408
+ "ate": 365,
409
+ "all": 366,
410
+ "art": 367,
411
+ "ak": 368,
412
+ "ich": 369,
413
+ "Ġch": 370,
414
+ "Ġor": 371,
415
+ "ab": 372,
416
+ "ant": 373,
417
+ "ud": 374,
418
+ "oc": 375,
419
+ "ber": 376,
420
+ "Ġex": 377,
421
+ "gh": 378,
422
+ "ity": 379,
423
+ "ated": 380,
424
+ "pt": 381,
425
+ "ess": 382,
426
+ "ear": 383,
427
+ "ĠK": 384,
428
+ "Ġpl": 385,
429
+ "ame": 386,
430
+ "qu": 387,
431
+ "ive": 388,
432
+ "rou": 389,
433
+ "Ġare": 390,
434
+ "Ġâ": 391,
435
+ "Ġsh": 392,
436
+ "Ġk": 393,
437
+ "ack": 394,
438
+ "ect": 395,
439
+ "ĠâĢ": 396,
440
+ "ĠU": 397,
441
+ "Ġhad": 398,
442
+ "se": 399,
443
+ "Ġwhich": 400,
444
+ "red": 401,
445
+ "ov": 402,
446
+ "ĠSt": 403,
447
+ "ast": 404,
448
+ "Ġsp": 405,
449
+ "ian": 406,
450
+ "Ġy": 407,
451
+ "ment": 408,
452
+ "Ġle": 409,
453
+ "Ġnot": 410,
454
+ "ge": 411,
455
+ "ord": 412,
456
+ "rit": 413,
457
+ "ip": 414,
458
+ "ine": 415,
459
+ "ell": 416,
460
+ "ally": 417,
461
+ "our": 418,
462
+ "ost": 419,
463
+ "ight": 420,
464
+ "ther": 421,
465
+ "ap": 422,
466
+ "Ġu": 423,
467
+ "ish": 424,
468
+ "ĠCh": 425,
469
+ "oun": 426,
470
+ "ia": 427,
471
+ "Ġ3": 428,
472
+ "ave": 429,
473
+ "ary": 430,
474
+ "ust": 431,
475
+ "og": 432,
476
+ "Ġ200": 433,
477
+ "Ġun": 434,
478
+ "ous": 435,
479
+ "irst": 436,
480
+ "ĠV": 437,
481
+ "cc": 438,
482
+ "Ġinc": 439,
483
+ "Ġ;": 440,
484
+ "Ġcomp": 441,
485
+ "ru": 442,
486
+ "ions": 443,
487
+ "Ġtheir": 444,
488
+ "Ġbut": 445,
489
+ "ide": 446,
490
+ "ure": 447,
491
+ "so": 448,
492
+ "Ġcont": 449,
493
+ "Ġint": 450,
494
+ "fter": 451,
495
+ "ical": 452,
496
+ "ial": 453,
497
+ "Ġar": 454,
498
+ "Ġfirst": 455,
499
+ "ould": 456,
500
+ "Ġits": 457,
501
+ "hed": 458,
502
+ "ĠâĢĵ": 459,
503
+ "Ġwhe": 460,
504
+ "wo": 461,
505
+ "out": 462,
506
+ "ub": 463,
507
+ "Ġ20": 464,
508
+ "ff": 465,
509
+ "Ġ:": 466,
510
+ "ue": 467,
511
+ "Ġher": 468,
512
+ "own": 469,
513
+ "ok": 470,
514
+ "Ġalso": 471,
515
+ "Ġcl": 472,
516
+ "per": 473,
517
+ "ign": 474,
518
+ "ater": 475,
519
+ "ran": 476,
520
+ "orm": 477,
521
+ "ie": 478,
522
+ "ome": 479,
523
+ "ork": 480,
524
+ "ass": 481,
525
+ "ire": 482,
526
+ "end": 483,
527
+ "Ġres": 484,
528
+ "Ġab": 485,
529
+ "Ġad": 486,
530
+ "Ġus": 487,
531
+ "ry": 488,
532
+ "Ġrec": 489,
533
+ "Ġhave": 490,
534
+ "age": 491,
535
+ "ĠHe": 492,
536
+ "Ġ4": 493,
537
+ "Ġro": 494,
538
+ "mer": 495,
539
+ "Ġone": 496,
540
+ "ond": 497,
541
+ "low": 498,
542
+ "Ġhas": 499,
543
+ "ĠTh": 500,
544
+ "du": 501,
545
+ "Ġ5": 502,
546
+ "Ġper": 503,
547
+ "Ġbeen": 504,
548
+ "ime": 505,
549
+ "Ġtwo": 506,
550
+ "ence": 507,
551
+ "land": 508,
552
+ "Ġ18": 509,
553
+ ".@": 510,
554
+ "Ġ@.@": 511,
555
+ "ult": 512,
556
+ "ree": 513,
557
+ "ough": 514,
558
+ "ile": 515,
559
+ "Ġwho": 516,
560
+ "ĠAl": 517,
561
+ "Ġsc": 518,
562
+ "uring": 519,
563
+ "pl": 520,
564
+ "ory": 521,
565
+ "ition": 522,
566
+ "ric": 523,
567
+ "ations": 524,
568
+ "Ġdis": 525,
569
+ "Ġthis": 526,
570
+ "Ġbec": 527,
571
+ "Ġapp": 528,
572
+ "iz": 529,
573
+ "ĠIt": 530,
574
+ "are": 531,
575
+ "ach": 532,
576
+ "lud": 533,
577
+ "ade": 534,
578
+ "Ġplay": 535,
579
+ "Ġj": 536,
580
+ "Ġman": 537,
581
+ "act": 538,
582
+ "ely": 539,
583
+ "Ġpart": 540,
584
+ "Ġdes": 541,
585
+ "Ġag": 542,
586
+ "Ġthey": 543,
587
+ "Ġyear": 544,
588
+ "ount": 545,
589
+ "Ġ201": 546,
590
+ "Ġover": 547,
591
+ "Ġother": 548,
592
+ "ound": 549,
593
+ "Ġafter": 550,
594
+ "ib": 551,
595
+ "over": 552,
596
+ "Ġser": 553,
597
+ "Ġen": 554,
598
+ "Ġoff": 555,
599
+ "Ġim": 556,
600
+ "ction": 557,
601
+ "ĠY": 558,
602
+ "ke": 559,
603
+ "ite": 560,
604
+ ",@": 561,
605
+ "Ġ@,@": 562,
606
+ "te": 563,
607
+ "urn": 564,
608
+ "Ġinclud": 565,
609
+ "ress": 566,
610
+ "ance": 567,
611
+ "ang": 568,
612
+ "Ġatt": 569,
613
+ "ice": 570,
614
+ "ace": 571,
615
+ "ark": 572,
616
+ "Ġout": 573,
617
+ "wn": 574,
618
+ "ph": 575,
619
+ "ember": 576,
620
+ "Ġpre": 577,
621
+ "Ġup": 578,
622
+ "ens": 579,
623
+ "man": 580,
624
+ "Ġev": 581,
625
+ "Ġtime": 582,
626
+ "nder": 583,
627
+ "rough": 584,
628
+ "ced": 585,
629
+ "Ġfin": 586,
630
+ "Ġinto": 587,
631
+ "one": 588,
632
+ "port": 589,
633
+ "round": 590,
634
+ "we": 591,
635
+ "ren": 592,
636
+ "les": 593,
637
+ "int": 594,
638
+ "ĠOn": 595,
639
+ "vel": 596,
640
+ "Ġcomm": 597,
641
+ "Ġshe": 598,
642
+ "ason": 599,
643
+ "amp": 600,
644
+ "Ġte": 601,
645
+ "Ġwould": 602,
646
+ "ward": 603,
647
+ "Ġmore": 604,
648
+ "Ġ6": 605,
649
+ "ied": 606,
650
+ "ose": 607,
651
+ "rib": 608,
652
+ "ĠUn": 609,
653
+ "Ġall": 610,
654
+ "ings": 611,
655
+ "tern": 612,
656
+ "ces": 613,
657
+ "able": 614,
658
+ "Ġwe": 615,
659
+ "ited": 616,
660
+ "ever": 617,
661
+ "ents": 618,
662
+ "Ġhim": 619,
663
+ "ased": 620,
664
+ "ors": 621,
665
+ "oy": 622,
666
+ "ood": 623,
667
+ "Ġcent": 624,
668
+ "ix": 625,
669
+ "ase": 626,
670
+ "ild": 627,
671
+ "ĠAn": 628,
672
+ "Ġ7": 629,
673
+ "Ġwork": 630,
674
+ "ates": 631,
675
+ "ious": 632,
676
+ "ath": 633,
677
+ "Ġpo": 634,
678
+ "rop": 635,
679
+ "old": 636,
680
+ "als": 637,
681
+ "iss": 638,
682
+ "ey": 639,
683
+ "ict": 640,
684
+ "Ġfe": 641,
685
+ "Ġthem": 642,
686
+ "gan": 643,
687
+ "Ġsec": 644,
688
+ "Ġbet": 645,
689
+ "Ġwhen": 646,
690
+ "Ġsong": 647,
691
+ "Ġrem": 648,
692
+ "ep": 649,
693
+ "form": 650,
694
+ "ail": 651,
695
+ "fer": 652,
696
+ "Ġear": 653,
697
+ "ubl": 654,
698
+ "aw": 655,
699
+ "Ġkn": 656,
700
+ "ake": 657,
701
+ "aus": 658,
702
+ "Ġmost": 659,
703
+ "Ġcons": 660,
704
+ "Ġduring": 661,
705
+ "ĠAs": 662,
706
+ "orth": 663,
707
+ "Ġnew": 664,
708
+ "ered": 665,
709
+ "ilm": 666,
710
+ "ved": 667,
711
+ "att": 668,
712
+ "Ġonly": 669,
713
+ "Ġ9": 670,
714
+ "Ġdec": 671,
715
+ "Ġ8": 672,
716
+ "ick": 673,
717
+ "Ġgame": 674,
718
+ "ons": 675,
719
+ "ug": 676,
720
+ "Ġtr": 677,
721
+ "ft": 678,
722
+ "oth": 679,
723
+ "ook": 680,
724
+ "ĠMar": 681,
725
+ "reat": 682,
726
+ "way": 683,
727
+ "Ġcan": 684,
728
+ "ollow": 685,
729
+ "outh": 686,
730
+ "ween": 687,
731
+ "ĠEn": 688,
732
+ "Ġ199": 689,
733
+ "ters": 690,
734
+ "Ġrel": 691,
735
+ "ind": 692,
736
+ "Ġabout": 693,
737
+ "Ġseason": 694,
738
+ "Ġagain": 695,
739
+ "ral": 696,
740
+ "Ġthree": 697,
741
+ "ational": 698,
742
+ "Ġunder": 699,
743
+ "ular": 700,
744
+ "Ġme": 701,
745
+ "Ġthan": 702,
746
+ "ĠCom": 703,
747
+ "ĠAr": 704,
748
+ "hip": 705,
749
+ "ob": 706,
750
+ "Ġne": 707,
751
+ "Ġbetween": 708,
752
+ "Ġfl": 709,
753
+ "hn": 710,
754
+ "ve": 711,
755
+ "Ġchar": 712,
756
+ "Ġcol": 713,
757
+ "Ġrecord": 714,
758
+ "iew": 715,
759
+ "ron": 716,
760
+ "fore": 717,
761
+ "Ġthrough": 718,
762
+ "ision": 719,
763
+ "orn": 720,
764
+ "Ġ00": 721,
765
+ "ock": 722,
766
+ "Ġver": 723,
767
+ "Ġlater": 724,
768
+ "Ġnum": 725,
769
+ "Ġend": 726,
770
+ "olog": 727,
771
+ "ames": 728,
772
+ "Ġpos": 729,
773
+ "Ġwrit": 730,
774
+ "Ġprodu": 731,
775
+ "Ġwhile": 732,
776
+ "Ġact": 733,
777
+ "Ġrele": 734,
778
+ "Ġfilm": 735,
779
+ "ished": 736,
780
+ "Ġpr": 737,
781
+ "ans": 738,
782
+ "Ġreg": 739,
783
+ "Ġform": 740,
784
+ "Ġass": 741,
785
+ "ĠSe": 742,
786
+ "ury": 743,
787
+ "ted": 744,
788
+ "ts": 745,
789
+ "Ġmade": 746,
790
+ "Ġsub": 747,
791
+ "Ġpe": 748,
792
+ "Ġso": 749,
793
+ "orld": 750,
794
+ "Ġret": 751,
795
+ "ĠNew": 752,
796
+ "Ġspec": 753,
797
+ "Ġacc": 754,
798
+ "Ġqu": 755,
799
+ "Ġwhere": 756,
800
+ "ener": 757,
801
+ "Ġmov": 758,
802
+ "hes": 759,
803
+ "meric": 760,
804
+ "ating": 761,
805
+ "Ġinter": 762,
806
+ "ĠLe": 763,
807
+ "ĠAmeric": 764,
808
+ "Ġra": 765,
809
+ "Ġsome": 766,
810
+ "Ġco": 767,
811
+ "Ġlar": 768,
812
+ "Ġbu": 769,
813
+ "Ġdef": 770,
814
+ "bum": 771,
815
+ "Ġac": 772,
816
+ "Ġmus": 773,
817
+ "Ġfollow": 774,
818
+ "ĠAt": 775,
819
+ "ins": 776,
820
+ "ived": 777,
821
+ "ific": 778,
822
+ "ual": 779,
823
+ "Ġam": 780,
824
+ "Ġsuch": 781,
825
+ "Ġsecond": 782,
826
+ "ike": 783,
827
+ "Ġfour": 784,
828
+ "Ġind": 785,
829
+ "ann": 786,
830
+ "hen": 787,
831
+ "Ġused": 788,
832
+ "ĠRe": 789,
833
+ "ics": 790,
834
+ "lect": 791,
835
+ "Ġday": 792,
836
+ "iel": 793,
837
+ "ily": 794,
838
+ "ĠThis": 795,
839
+ "Ġ0": 796,
840
+ "Ġpubl": 797,
841
+ "Ġcall": 798,
842
+ "ĠJo": 799,
843
+ "ll": 800,
844
+ "Ġalbum": 801,
845
+ "Ġ000": 802,
846
+ "rans": 803,
847
+ "Ġdo": 804,
848
+ "any": 805,
849
+ "Ġbefore": 806,
850
+ "ros": 807,
851
+ "ĠSh": 808,
852
+ "Ġsy": 809,
853
+ "aid": 810,
854
+ "ĠEng": 811,
855
+ "Ġbeing": 812,
856
+ "Ġ10": 813,
857
+ "uc": 814,
858
+ "Ġep": 815,
859
+ "Ġsupp": 816,
860
+ "Ġthere": 817,
861
+ "Ġyears": 818,
862
+ "ars": 819,
863
+ "owever": 820,
864
+ "Ġent": 821,
865
+ "ife": 822,
866
+ "Ġhigh": 823,
867
+ "Ġfound": 824,
868
+ "ird": 825,
869
+ "Ġno": 826,
870
+ "Ġset": 827,
871
+ "ines": 828,
872
+ "iver": 829,
873
+ "io": 830,
874
+ "other": 831,
875
+ "ject": 832,
876
+ "Ġsur": 833,
877
+ "aj": 834,
878
+ "ten": 835,
879
+ "Ġtra": 836,
880
+ "Ġ12": 837,
881
+ "ised": 838,
882
+ "ities": 839,
883
+ "velop": 840,
884
+ "Ġbl": 841,
885
+ "ale": 842,
886
+ "Ġseries": 843,
887
+ "Ġloc": 844,
888
+ "Ġnumber": 845,
889
+ "Ġpres": 846,
890
+ "ane": 847,
891
+ "ause": 848,
892
+ "ode": 849,
893
+ "ek": 850,
894
+ "ton": 851,
895
+ "ĠSc": 852,
896
+ "ier": 853,
897
+ "ise": 854,
898
+ "Ġsever": 855,
899
+ "ince": 856,
900
+ "Ġboth": 857,
901
+ "ank": 858,
902
+ "row": 859,
903
+ "irect": 860,
904
+ "son": 861,
905
+ "Ġthen": 862,
906
+ "ĠBrit": 863,
907
+ "iet": 864,
908
+ "Ġ16": 865,
909
+ "Ġepis": 866,
910
+ "Ġincluding": 867,
911
+ "its": 868,
912
+ "igin": 869,
913
+ "pr": 870,
914
+ "Ġ/": 871,
915
+ "Ġagainst": 872,
916
+ "Ġwell": 873,
917
+ "Ġbecame": 874,
918
+ "Ġexp": 875,
919
+ "Ġknown": 876,
920
+ "Ġtrans": 877,
921
+ "Ġcharac": 878,
922
+ "ĠâĢĶ": 879,
923
+ "ram": 880,
924
+ "Ġback": 881,
925
+ "Ġadd": 882,
926
+ "Ġpop": 883,
927
+ "Ġgo": 884,
928
+ "urch": 885,
929
+ "Ġdesc": 886,
930
+ "Ġsing": 887,
931
+ "ield": 888,
932
+ "Ġperform": 889,
933
+ "ained": 890,
934
+ "Ġrece": 891,
935
+ "ident": 892,
936
+ "Ġem": 893,
937
+ "ert": 894,
938
+ "ures": 895,
939
+ "Ġinv": 896,
940
+ "Ġdep": 897,
941
+ "Ġ198": 898,
942
+ "air": 899,
943
+ "ern": 900,
944
+ "ather": 901,
945
+ "ful": 902,
946
+ "ĠZ": 903,
947
+ "Ġmon": 904,
948
+ "Ġmany": 905,
949
+ "Ġmain": 906,
950
+ "Ġstud": 907,
951
+ "Ġlong": 908,
952
+ "inn": 909,
953
+ "though": 910,
954
+ "up": 911,
955
+ "ool": 912,
956
+ "ĠUnited": 913,
957
+ "led": 914,
958
+ "ement": 915,
959
+ "Ġ15": 916,
960
+ "ower": 917,
961
+ "ĠJohn": 918,
962
+ "Ġop": 919,
963
+ "Ġ11": 920,
964
+ "ined": 921,
965
+ "Ġmet": 922,
966
+ "ober": 923,
967
+ "ley": 924,
968
+ "Ġ17": 925,
969
+ "Ġcentury": 926,
970
+ "Ġteam": 927,
971
+ "Ġest": 928,
972
+ "ĠAfter": 929,
973
+ "yl": 930,
974
+ "Ġmin": 931,
975
+ "uch": 932,
976
+ "ute": 933,
977
+ "Ġdevelop": 934,
978
+ "ĠShe": 935,
979
+ "iam": 936,
980
+ "Ġshow": 937,
981
+ "elf": 938,
982
+ "Ġrep": 939,
983
+ "Ġconc": 940,
984
+ "ative": 941,
985
+ "Ġcre": 942,
986
+ "overn": 943,
987
+ "ared": 944,
988
+ "Ġ194": 945,
989
+ "Ġorigin": 946,
990
+ "Ġsm": 947,
991
+ "ivers": 948,
992
+ "az": 949,
993
+ "Ġlead": 950,
994
+ "Ġseveral": 951,
995
+ "ah": 952,
996
+ "Ġob": 953,
997
+ "Ġrev": 954,
998
+ "Ġmill": 955,
999
+ "erm": 956,
1000
+ "ually": 957,
1001
+ "oot": 958,
1002
+ "Ġbegan": 959,
1003
+ "Ġ196": 960,
1004
+ "ired": 961,
1005
+ "Ġdif": 962,
1006
+ "Ġcontin": 963,
1007
+ "Ġsign": 964,
1008
+ "ik": 965,
1009
+ "ĠInd": 966,
1010
+ "ments": 967,
1011
+ "ized": 968,
1012
+ "Ġ197": 969,
1013
+ "Ġdirect": 970,
1014
+ "au": 971,
1015
+ "Ġext": 972,
1016
+ "ross": 973,
1017
+ "emb": 974,
1018
+ "der": 975,
1019
+ "Ġpol": 976,
1020
+ "Ġmay": 977,
1021
+ "apt": 978,
1022
+ "els": 979,
1023
+ "ĠWh": 980,
1024
+ "Ġcomple": 981,
1025
+ "Ġart": 982,
1026
+ "ĠBr": 983,
1027
+ "ĠIs": 984,
1028
+ "une": 985,
1029
+ "til": 986,
1030
+ "Ġcrit": 987,
1031
+ "Ġhist": 988,
1032
+ "Ġearly": 989,
1033
+ "Ġcould": 990,
1034
+ "ĠCon": 991,
1035
+ "Ġdid": 992,
1036
+ "Ġbel": 993,
1037
+ "Ġcalled": 994,
1038
+ "ued": 995,
1039
+ "Ġnear": 996,
1040
+ "Ġepisode": 997,
1041
+ "yp": 998,
1042
+ "Ġdescrib": 999
1043
+ },
1044
+ "merges": [
1045
+ "Ġ t",
1046
+ "h e",
1047
+ "Ġ a",
1048
+ "i n",
1049
+ "Ġt he",
1050
+ "e r",
1051
+ "o n",
1052
+ "Ġ ,",
1053
+ "r e",
1054
+ "Ġ s",
1055
+ "e d",
1056
+ "Ġ o",
1057
+ "Ġ w",
1058
+ "n d",
1059
+ "a t",
1060
+ "Ġ .",
1061
+ "o r",
1062
+ "i t",
1063
+ "Ġ c",
1064
+ "e n",
1065
+ "Ġ f",
1066
+ "i s",
1067
+ "e s",
1068
+ "a r",
1069
+ "Ġo f",
1070
+ "Ġ b",
1071
+ "a n",
1072
+ "Ġ in",
1073
+ "a l",
1074
+ "in g",
1075
+ "Ġ p",
1076
+ "Ġa nd",
1077
+ "a s",
1078
+ "Ġt o",
1079
+ "r o",
1080
+ "i c",
1081
+ "Ġ m",
1082
+ "Ġ d",
1083
+ "Ġ h",
1084
+ "i on",
1085
+ "l e",
1086
+ "o u",
1087
+ "Ġ T",
1088
+ "Ġ re",
1089
+ "Ġ =",
1090
+ "Ġ \"",
1091
+ "Ġ A",
1092
+ "Ġ S",
1093
+ "en t",
1094
+ "i l",
1095
+ "Ġt h",
1096
+ "Ġ 1",
1097
+ "s t",
1098
+ "Ġ C",
1099
+ "e l",
1100
+ "o m",
1101
+ "Ġ l",
1102
+ "a m",
1103
+ "Ġ Ċ",
1104
+ "Ġ e",
1105
+ "Ġ n",
1106
+ "Ġ @",
1107
+ "a d",
1108
+ "a c",
1109
+ "Ġw as",
1110
+ "Ġ M",
1111
+ "u r",
1112
+ "ĠT he",
1113
+ "e c",
1114
+ "Ġ on",
1115
+ "l y",
1116
+ "Ġ B",
1117
+ "Ġ I",
1118
+ "Ġ g",
1119
+ "Ġ '",
1120
+ "e t",
1121
+ "o l",
1122
+ "i d",
1123
+ "i v",
1124
+ "i m",
1125
+ "Ġf or",
1126
+ "i r",
1127
+ "- @",
1128
+ "Ġ@ -@",
1129
+ "i g",
1130
+ "o t",
1131
+ "t er",
1132
+ "Ġa s",
1133
+ "Ġ H",
1134
+ "u s",
1135
+ "o w",
1136
+ "Ġs t",
1137
+ "u t",
1138
+ "it h",
1139
+ "a y",
1140
+ "Ġ 2",
1141
+ "Ġ P",
1142
+ "at ion",
1143
+ "v er",
1144
+ "Ġb e",
1145
+ "he r",
1146
+ "Ġth at",
1147
+ "Ġw ith",
1148
+ "Ġ R",
1149
+ "c e",
1150
+ "t h",
1151
+ "Ġ D",
1152
+ "Ġ is",
1153
+ "u n",
1154
+ "e m",
1155
+ "Ġ F",
1156
+ "Ġw h",
1157
+ "u l",
1158
+ "Ġb y",
1159
+ "Ġa l",
1160
+ "c h",
1161
+ "Ġ )",
1162
+ "Ġ (",
1163
+ "Ġ W",
1164
+ "Ġc on",
1165
+ "r a",
1166
+ "Ġ G",
1167
+ "o s",
1168
+ "Ġ L",
1169
+ "Ġ N",
1170
+ "Ġa t",
1171
+ "er s",
1172
+ "c t",
1173
+ "Ġ it",
1174
+ "Ġ1 9",
1175
+ "ro m",
1176
+ "a nd",
1177
+ "Ġa n",
1178
+ "u m",
1179
+ "es t",
1180
+ "Ġ J",
1181
+ "a g",
1182
+ "Ġ he",
1183
+ "0 0",
1184
+ "is t",
1185
+ "a in",
1186
+ "o d",
1187
+ "a v",
1188
+ "r i",
1189
+ "Ġ E",
1190
+ "Ġ O",
1191
+ "Ġf rom",
1192
+ "Ġc om",
1193
+ "Ġh is",
1194
+ "o p",
1195
+ "Ġp ro",
1196
+ "re s",
1197
+ "i es",
1198
+ "i f",
1199
+ "Ġ v",
1200
+ "or t",
1201
+ "er e",
1202
+ "il l",
1203
+ "l d",
1204
+ "Ġd e",
1205
+ "p p",
1206
+ "Ġs u",
1207
+ "o re",
1208
+ "ĠI n",
1209
+ "Ġ r",
1210
+ "Ġs e",
1211
+ "Ġw ere",
1212
+ "e w",
1213
+ "on g",
1214
+ "ig h",
1215
+ "ar d",
1216
+ "at e",
1217
+ "al l",
1218
+ "ar t",
1219
+ "a k",
1220
+ "ic h",
1221
+ "Ġc h",
1222
+ "Ġo r",
1223
+ "a b",
1224
+ "an t",
1225
+ "u d",
1226
+ "o c",
1227
+ "b er",
1228
+ "Ġe x",
1229
+ "g h",
1230
+ "it y",
1231
+ "at ed",
1232
+ "p t",
1233
+ "es s",
1234
+ "e ar",
1235
+ "Ġ K",
1236
+ "Ġp l",
1237
+ "am e",
1238
+ "q u",
1239
+ "iv e",
1240
+ "ro u",
1241
+ "Ġa re",
1242
+ "Ġ â",
1243
+ "Ġs h",
1244
+ "Ġ k",
1245
+ "ac k",
1246
+ "ec t",
1247
+ "Ġâ Ģ",
1248
+ "Ġ U",
1249
+ "Ġh ad",
1250
+ "s e",
1251
+ "Ġwh ich",
1252
+ "re d",
1253
+ "o v",
1254
+ "ĠS t",
1255
+ "as t",
1256
+ "Ġs p",
1257
+ "i an",
1258
+ "Ġ y",
1259
+ "m ent",
1260
+ "Ġ le",
1261
+ "Ġn ot",
1262
+ "g e",
1263
+ "or d",
1264
+ "r it",
1265
+ "i p",
1266
+ "in e",
1267
+ "el l",
1268
+ "al ly",
1269
+ "ou r",
1270
+ "o st",
1271
+ "igh t",
1272
+ "t her",
1273
+ "a p",
1274
+ "Ġ u",
1275
+ "is h",
1276
+ "ĠC h",
1277
+ "ou n",
1278
+ "i a",
1279
+ "Ġ 3",
1280
+ "av e",
1281
+ "ar y",
1282
+ "u st",
1283
+ "o g",
1284
+ "Ġ2 00",
1285
+ "Ġ un",
1286
+ "ou s",
1287
+ "ir st",
1288
+ "Ġ V",
1289
+ "c c",
1290
+ "Ġin c",
1291
+ "Ġ ;",
1292
+ "Ġcom p",
1293
+ "r u",
1294
+ "ion s",
1295
+ "Ġthe ir",
1296
+ "Ġb ut",
1297
+ "id e",
1298
+ "u re",
1299
+ "s o",
1300
+ "Ġcon t",
1301
+ "Ġin t",
1302
+ "f ter",
1303
+ "ic al",
1304
+ "i al",
1305
+ "Ġa r",
1306
+ "Ġf irst",
1307
+ "ou ld",
1308
+ "Ġit s",
1309
+ "he d",
1310
+ "ĠâĢ ĵ",
1311
+ "Ġw he",
1312
+ "w o",
1313
+ "ou t",
1314
+ "u b",
1315
+ "Ġ2 0",
1316
+ "f f",
1317
+ "Ġ :",
1318
+ "u e",
1319
+ "Ġ her",
1320
+ "ow n",
1321
+ "o k",
1322
+ "Ġal so",
1323
+ "Ġc l",
1324
+ "p er",
1325
+ "ig n",
1326
+ "at er",
1327
+ "r an",
1328
+ "or m",
1329
+ "i e",
1330
+ "om e",
1331
+ "or k",
1332
+ "as s",
1333
+ "i re",
1334
+ "e nd",
1335
+ "Ġre s",
1336
+ "Ġa b",
1337
+ "Ġa d",
1338
+ "Ġ us",
1339
+ "r y",
1340
+ "Ġre c",
1341
+ "Ġh ave",
1342
+ "ag e",
1343
+ "ĠH e",
1344
+ "Ġ 4",
1345
+ "Ġ ro",
1346
+ "m er",
1347
+ "Ġon e",
1348
+ "on d",
1349
+ "l ow",
1350
+ "Ġh as",
1351
+ "ĠT h",
1352
+ "d u",
1353
+ "Ġ 5",
1354
+ "Ġp er",
1355
+ "Ġbe en",
1356
+ "im e",
1357
+ "Ġt wo",
1358
+ "en ce",
1359
+ "l and",
1360
+ "Ġ1 8",
1361
+ ". @",
1362
+ "Ġ@ .@",
1363
+ "ul t",
1364
+ "re e",
1365
+ "ou gh",
1366
+ "i le",
1367
+ "Ġwh o",
1368
+ "ĠA l",
1369
+ "Ġs c",
1370
+ "ur ing",
1371
+ "p l",
1372
+ "or y",
1373
+ "it ion",
1374
+ "r ic",
1375
+ "ation s",
1376
+ "Ġd is",
1377
+ "Ġth is",
1378
+ "Ġb ec",
1379
+ "Ġa pp",
1380
+ "i z",
1381
+ "ĠI t",
1382
+ "a re",
1383
+ "ac h",
1384
+ "l ud",
1385
+ "ad e",
1386
+ "Ġpl ay",
1387
+ "Ġ j",
1388
+ "Ġm an",
1389
+ "ac t",
1390
+ "el y",
1391
+ "Ġp art",
1392
+ "Ġd es",
1393
+ "Ġa g",
1394
+ "Ġthe y",
1395
+ "Ġy ear",
1396
+ "oun t",
1397
+ "Ġ20 1",
1398
+ "Ġo ver",
1399
+ "Ġo ther",
1400
+ "ou nd",
1401
+ "Ġa fter",
1402
+ "i b",
1403
+ "o ver",
1404
+ "Ġs er",
1405
+ "Ġ en",
1406
+ "Ġof f",
1407
+ "Ġ im",
1408
+ "ct ion",
1409
+ "Ġ Y",
1410
+ "k e",
1411
+ "it e",
1412
+ ", @",
1413
+ "Ġ@ ,@",
1414
+ "t e",
1415
+ "ur n",
1416
+ "Ġinc lud",
1417
+ "res s",
1418
+ "an ce",
1419
+ "an g",
1420
+ "Ġat t",
1421
+ "ic e",
1422
+ "ac e",
1423
+ "ar k",
1424
+ "Ġo ut",
1425
+ "w n",
1426
+ "p h",
1427
+ "em ber",
1428
+ "Ġp re",
1429
+ "Ġu p",
1430
+ "en s",
1431
+ "m an",
1432
+ "Ġe v",
1433
+ "Ġt ime",
1434
+ "nd er",
1435
+ "rou gh",
1436
+ "c ed",
1437
+ "Ġf in",
1438
+ "Ġint o",
1439
+ "on e",
1440
+ "p ort",
1441
+ "rou nd",
1442
+ "w e",
1443
+ "re n",
1444
+ "l es",
1445
+ "in t",
1446
+ "ĠO n",
1447
+ "v el",
1448
+ "Ġcom m",
1449
+ "Ġs he",
1450
+ "as on",
1451
+ "am p",
1452
+ "Ġt e",
1453
+ "Ġw ould",
1454
+ "w ard",
1455
+ "Ġm ore",
1456
+ "Ġ 6",
1457
+ "i ed",
1458
+ "os e",
1459
+ "ri b",
1460
+ "ĠU n",
1461
+ "Ġal l",
1462
+ "ing s",
1463
+ "ter n",
1464
+ "c es",
1465
+ "ab le",
1466
+ "Ġw e",
1467
+ "it ed",
1468
+ "e ver",
1469
+ "ent s",
1470
+ "Ġh im",
1471
+ "as ed",
1472
+ "or s",
1473
+ "o y",
1474
+ "o od",
1475
+ "Ġc ent",
1476
+ "i x",
1477
+ "as e",
1478
+ "il d",
1479
+ "ĠA n",
1480
+ "Ġ 7",
1481
+ "Ġw ork",
1482
+ "at es",
1483
+ "i ous",
1484
+ "at h",
1485
+ "Ġp o",
1486
+ "ro p",
1487
+ "ol d",
1488
+ "al s",
1489
+ "is s",
1490
+ "e y",
1491
+ "ic t",
1492
+ "Ġf e",
1493
+ "Ġthe m",
1494
+ "g an",
1495
+ "Ġs ec",
1496
+ "Ġb et",
1497
+ "Ġwhe n",
1498
+ "Ġs ong",
1499
+ "Ġre m",
1500
+ "e p",
1501
+ "f orm",
1502
+ "a il",
1503
+ "f er",
1504
+ "Ġe ar",
1505
+ "ub l",
1506
+ "a w",
1507
+ "Ġk n",
1508
+ "ak e",
1509
+ "a us",
1510
+ "Ġm ost",
1511
+ "Ġcon s",
1512
+ "Ġd uring",
1513
+ "ĠA s",
1514
+ "or th",
1515
+ "Ġn ew",
1516
+ "er ed",
1517
+ "il m",
1518
+ "v ed",
1519
+ "at t",
1520
+ "Ġon ly",
1521
+ "Ġ 9",
1522
+ "Ġd ec",
1523
+ "Ġ 8",
1524
+ "ic k",
1525
+ "Ġg ame",
1526
+ "on s",
1527
+ "u g",
1528
+ "Ġt r",
1529
+ "f t",
1530
+ "ot h",
1531
+ "o ok",
1532
+ "ĠM ar",
1533
+ "re at",
1534
+ "w ay",
1535
+ "Ġc an",
1536
+ "ol low",
1537
+ "ou th",
1538
+ "we en",
1539
+ "ĠE n",
1540
+ "Ġ19 9",
1541
+ "ter s",
1542
+ "Ġre l",
1543
+ "in d",
1544
+ "Ġab out",
1545
+ "Ġse ason",
1546
+ "Ġag ain",
1547
+ "r al",
1548
+ "Ġth ree",
1549
+ "ation al",
1550
+ "Ġu nder",
1551
+ "ul ar",
1552
+ "Ġm e",
1553
+ "Ġth an",
1554
+ "ĠC om",
1555
+ "ĠA r",
1556
+ "h ip",
1557
+ "o b",
1558
+ "Ġn e",
1559
+ "Ġbet ween",
1560
+ "Ġf l",
1561
+ "h n",
1562
+ "v e",
1563
+ "Ġch ar",
1564
+ "Ġc ol",
1565
+ "Ġrec ord",
1566
+ "i ew",
1567
+ "r on",
1568
+ "f ore",
1569
+ "Ġth rough",
1570
+ "is ion",
1571
+ "or n",
1572
+ "Ġ 00",
1573
+ "oc k",
1574
+ "Ġ ver",
1575
+ "Ġl ater",
1576
+ "Ġn um",
1577
+ "Ġe nd",
1578
+ "ol og",
1579
+ "am es",
1580
+ "Ġp os",
1581
+ "Ġw rit",
1582
+ "Ġpro du",
1583
+ "Ġwh ile",
1584
+ "Ġa ct",
1585
+ "Ġre le",
1586
+ "Ġf ilm",
1587
+ "is hed",
1588
+ "Ġp r",
1589
+ "an s",
1590
+ "Ġre g",
1591
+ "Ġfor m",
1592
+ "Ġas s",
1593
+ "ĠS e",
1594
+ "ur y",
1595
+ "t ed",
1596
+ "t s",
1597
+ "Ġm ade",
1598
+ "Ġsu b",
1599
+ "Ġp e",
1600
+ "Ġs o",
1601
+ "or ld",
1602
+ "Ġre t",
1603
+ "ĠN ew",
1604
+ "Ġsp ec",
1605
+ "Ġa cc",
1606
+ "Ġ qu",
1607
+ "Ġwhe re",
1608
+ "en er",
1609
+ "Ġm ov",
1610
+ "he s",
1611
+ "mer ic",
1612
+ "at ing",
1613
+ "Ġin ter",
1614
+ "ĠL e",
1615
+ "ĠA meric",
1616
+ "Ġ ra",
1617
+ "Ġs ome",
1618
+ "Ġc o",
1619
+ "Ġl ar",
1620
+ "Ġb u",
1621
+ "Ġde f",
1622
+ "b um",
1623
+ "Ġa c",
1624
+ "Ġm us",
1625
+ "Ġf ollow",
1626
+ "ĠA t",
1627
+ "in s",
1628
+ "iv ed",
1629
+ "if ic",
1630
+ "u al",
1631
+ "Ġa m",
1632
+ "Ġsu ch",
1633
+ "Ġsec ond",
1634
+ "i ke",
1635
+ "Ġf our",
1636
+ "Ġin d",
1637
+ "an n",
1638
+ "he n",
1639
+ "Ġus ed",
1640
+ "ĠR e",
1641
+ "ic s",
1642
+ "le ct",
1643
+ "Ġd ay",
1644
+ "i el",
1645
+ "il y",
1646
+ "ĠTh is",
1647
+ "Ġ 0",
1648
+ "Ġp ubl",
1649
+ "Ġc all",
1650
+ "ĠJ o",
1651
+ "l l",
1652
+ "Ġal bum",
1653
+ "Ġ00 0",
1654
+ "ran s",
1655
+ "Ġd o",
1656
+ "an y",
1657
+ "Ġbe fore",
1658
+ "ro s",
1659
+ "ĠS h",
1660
+ "Ġs y",
1661
+ "a id",
1662
+ "ĠEn g",
1663
+ "Ġbe ing",
1664
+ "Ġ1 0",
1665
+ "u c",
1666
+ "Ġe p",
1667
+ "Ġsu pp",
1668
+ "Ġthe re",
1669
+ "Ġyear s",
1670
+ "ar s",
1671
+ "ow ever",
1672
+ "Ġ ent",
1673
+ "if e",
1674
+ "Ġh igh",
1675
+ "Ġf ound",
1676
+ "ir d",
1677
+ "Ġn o",
1678
+ "Ġs et",
1679
+ "in es",
1680
+ "iv er",
1681
+ "i o",
1682
+ "ot her",
1683
+ "j ect",
1684
+ "Ġs ur",
1685
+ "a j",
1686
+ "t en",
1687
+ "Ġt ra",
1688
+ "Ġ1 2",
1689
+ "is ed",
1690
+ "it ies",
1691
+ "vel op",
1692
+ "Ġb l",
1693
+ "al e",
1694
+ "Ġser ies",
1695
+ "Ġl oc",
1696
+ "Ġnum ber",
1697
+ "Ġp res",
1698
+ "an e",
1699
+ "aus e",
1700
+ "od e",
1701
+ "e k",
1702
+ "t on",
1703
+ "ĠS c",
1704
+ "i er",
1705
+ "is e",
1706
+ "Ġse ver",
1707
+ "in ce",
1708
+ "Ġb oth",
1709
+ "an k",
1710
+ "ro w",
1711
+ "ire ct",
1712
+ "s on",
1713
+ "Ġthe n",
1714
+ "ĠB rit",
1715
+ "i et",
1716
+ "Ġ1 6",
1717
+ "Ġep is",
1718
+ "Ġinclud ing",
1719
+ "it s",
1720
+ "ig in",
1721
+ "p r",
1722
+ "Ġ /",
1723
+ "Ġagain st",
1724
+ "Ġw ell",
1725
+ "Ġbec ame",
1726
+ "Ġex p",
1727
+ "Ġkn own",
1728
+ "Ġt rans",
1729
+ "Ġchar ac",
1730
+ "ĠâĢ Ķ",
1731
+ "r am",
1732
+ "Ġb ack",
1733
+ "Ġad d",
1734
+ "Ġp op",
1735
+ "Ġg o",
1736
+ "ur ch",
1737
+ "Ġdes c",
1738
+ "Ġs ing",
1739
+ "iel d",
1740
+ "Ġper form",
1741
+ "ain ed",
1742
+ "Ġre ce",
1743
+ "id ent",
1744
+ "Ġe m",
1745
+ "er t",
1746
+ "u res",
1747
+ "Ġin v",
1748
+ "Ġde p",
1749
+ "Ġ19 8",
1750
+ "a ir",
1751
+ "er n",
1752
+ "at her",
1753
+ "f ul",
1754
+ "Ġ Z",
1755
+ "Ġm on",
1756
+ "Ġman y",
1757
+ "Ġm ain",
1758
+ "Ġst ud",
1759
+ "Ġl ong",
1760
+ "in n",
1761
+ "th ough",
1762
+ "u p",
1763
+ "o ol",
1764
+ "ĠUn ited",
1765
+ "l ed",
1766
+ "em ent",
1767
+ "Ġ1 5",
1768
+ "ow er",
1769
+ "ĠJo hn",
1770
+ "Ġo p",
1771
+ "Ġ1 1",
1772
+ "in ed",
1773
+ "Ġm et",
1774
+ "o ber",
1775
+ "le y",
1776
+ "Ġ1 7",
1777
+ "Ġcent ury",
1778
+ "Ġte am",
1779
+ "Ġ est",
1780
+ "ĠA fter",
1781
+ "y l",
1782
+ "Ġm in",
1783
+ "u ch",
1784
+ "ut e",
1785
+ "Ġde velop",
1786
+ "ĠS he",
1787
+ "i am",
1788
+ "Ġsh ow",
1789
+ "el f",
1790
+ "Ġre p",
1791
+ "Ġcon c",
1792
+ "at ive",
1793
+ "Ġc re",
1794
+ "over n",
1795
+ "a red",
1796
+ "Ġ19 4",
1797
+ "Ġor igin",
1798
+ "Ġs m",
1799
+ "iv ers",
1800
+ "a z",
1801
+ "Ġle ad",
1802
+ "Ġsever al",
1803
+ "a h",
1804
+ "Ġo b",
1805
+ "Ġre v",
1806
+ "Ġm ill",
1807
+ "er m",
1808
+ "u ally",
1809
+ "o ot",
1810
+ "Ġbe gan",
1811
+ "Ġ19 6",
1812
+ "i red",
1813
+ "Ġd if",
1814
+ "Ġcont in",
1815
+ "Ġs ign",
1816
+ "i k",
1817
+ "ĠI nd",
1818
+ "ment s",
1819
+ "iz ed",
1820
+ "Ġ19 7",
1821
+ "Ġd irect",
1822
+ "a u",
1823
+ "Ġex t",
1824
+ "ros s",
1825
+ "em b",
1826
+ "d er",
1827
+ "Ġp ol",
1828
+ "Ġm ay",
1829
+ "a pt",
1830
+ "el s",
1831
+ "ĠW h",
1832
+ "Ġcomp le",
1833
+ "Ġar t",
1834
+ "ĠB r",
1835
+ "ĠI s",
1836
+ "un e",
1837
+ "t il",
1838
+ "Ġc rit",
1839
+ "Ġh ist",
1840
+ "Ġear ly",
1841
+ "Ġc ould",
1842
+ "ĠC on",
1843
+ "Ġd id",
1844
+ "Ġb el",
1845
+ "Ġcall ed",
1846
+ "u ed",
1847
+ "Ġn ear",
1848
+ "Ġepis ode",
1849
+ "y p",
1850
+ "Ġdesc rib"
1851
+ ]
1852
+ }
1853
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "hf-internal-testing/tiny-random-gpt2",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"|":91,"}":92,"~":93,"¡":94,"¢":95,"£":96,"¤":97,"¥":98,"¦":99,"§":100,"¨":101,"©":102,"ª":103,"«":104,"¬":105,"®":106,"¯":107,"°":108,"±":109,"²":110,"³":111,"´":112,"µ":113,"¶":114,"·":115,"¸":116,"¹":117,"º":118,"»":119,"¼":120,"½":121,"¾":122,"¿":123,"Â":124,"Ã":125,"Ä":126,"Å":127,"Æ":128,"Ç":129,"È":130,"É":131,"Ê":132,"Ë":133,"Ì":134,"Í":135,"Î":136,"Ï":137,"Ð":138,"Ñ":139,"Ö":140,"×":141,"Ø":142,"Ù":143,"Ü":144,"à":145,"á":146,"â":147,"ã":148,"ä":149,"å":150,"æ":151,"ç":152,"è":153,"é":154,"ë":155,"ì":156,"ï":157,"Ċ":158,"Ġ":159,"Ģ":160,"ģ":161,"Ĥ":162,"ĥ":163,"Ħ":164,"ħ":165,"Ĩ":166,"ĩ":167,"Ī":168,"ī":169,"Ĭ":170,"ĭ":171,"Į":172,"į":173,"İ":174,"ı":175,"IJ":176,"ij":177,"Ĵ":178,"ĵ":179,"Ķ":180,"ķ":181,"ĸ":182,"Ĺ":183,"ĺ":184,"Ļ":185,"ļ":186,"Ľ":187,"ľ":188,"Ŀ":189,"ŀ":190,"Ł":191,"ł":192,"Ń":193,"Ġt":194,"he":195,"Ġa":196,"in":197,"Ġthe":198,"er":199,"on":200,"Ġ,":201,"re":202,"Ġs":203,"ed":204,"Ġo":205,"Ġw":206,"nd":207,"at":208,"Ġ.":209,"or":210,"it":211,"Ġc":212,"en":213,"Ġf":214,"is":215,"es":216,"ar":217,"Ġof":218,"Ġb":219,"an":220,"Ġin":221,"al":222,"ing":223,"Ġp":224,"Ġand":225,"as":226,"Ġto":227,"ro":228,"ic":229,"Ġm":230,"Ġd":231,"Ġh":232,"ion":233,"le":234,"ou":235,"ĠT":236,"Ġre":237,"Ġ=":238,"Ġ\"":239,"ĠA":240,"ĠS":241,"ent":242,"il":243,"Ġth":244,"Ġ1":245,"st":246,"ĠC":247,"el":248,"om":249,"Ġl":250,"am":251,"ĠĊ":252,"Ġe":253,"Ġn":254,"Ġ@":255,"ad":256,"ac":257,"Ġwas":258,"ĠM":259,"ur":260,"ĠThe":261,"ec":262,"Ġon":263,"ly":264,"ĠB":265,"ĠI":266,"Ġg":267,"Ġ'":268,"et":269,"ol":270,"id":271,"iv":272,"im":273,"Ġfor":274,"ir":275,"-@":276,"Ġ@-@":277,"ig":278,"ot":279,"ter":280,"Ġas":281,"ĠH":282,"us":283,"ow":284,"Ġst":285,"ut":286,"ith":287,"ay":288,"Ġ2":289,"ĠP":290,"ation":291,"ver":292,"Ġbe":293,"her":294,"Ġthat":295,"Ġwith":296,"ĠR":297,"ce":298,"th":299,"ĠD":300,"Ġis":301,"un":302,"em":303,"ĠF":304,"Ġwh":305,"ul":306,"Ġby":307,"Ġal":308,"ch":309,"Ġ)":310,"Ġ(":311,"ĠW":312,"Ġcon":313,"ra":314,"ĠG":315,"os":316,"ĠL":317,"ĠN":318,"Ġat":319,"ers":320,"ct":321,"Ġit":322,"Ġ19":323,"rom":324,"and":325,"Ġan":326,"um":327,"est":328,"ĠJ":329,"ag":330,"Ġhe":331,"00":332,"ist":333,"ain":334,"od":335,"av":336,"ri":337,"ĠE":338,"ĠO":339,"Ġfrom":340,"Ġcom":341,"Ġhis":342,"op":343,"Ġpro":344,"res":345,"ies":346,"if":347,"Ġv":348,"ort":349,"ere":350,"ill":351,"ld":352,"Ġde":353,"pp":354,"Ġsu":355,"ore":356,"ĠIn":357,"Ġr":358,"Ġse":359,"Ġwere":360,"ew":361,"ong":362,"igh":363,"ard":364,"ate":365,"all":366,"art":367,"ak":368,"ich":369,"Ġch":370,"Ġor":371,"ab":372,"ant":373,"ud":374,"oc":375,"ber":376,"Ġex":377,"gh":378,"ity":379,"ated":380,"pt":381,"ess":382,"ear":383,"ĠK":384,"Ġpl":385,"ame":386,"qu":387,"ive":388,"rou":389,"Ġare":390,"Ġâ":391,"Ġsh":392,"Ġk":393,"ack":394,"ect":395,"ĠâĢ":396,"ĠU":397,"Ġhad":398,"se":399,"Ġwhich":400,"red":401,"ov":402,"ĠSt":403,"ast":404,"Ġsp":405,"ian":406,"Ġy":407,"ment":408,"Ġle":409,"Ġnot":410,"ge":411,"ord":412,"rit":413,"ip":414,"ine":415,"ell":416,"ally":417,"our":418,"ost":419,"ight":420,"ther":421,"ap":422,"Ġu":423,"ish":424,"ĠCh":425,"oun":426,"ia":427,"Ġ3":428,"ave":429,"ary":430,"ust":431,"og":432,"Ġ200":433,"Ġun":434,"ous":435,"irst":436,"ĠV":437,"cc":438,"Ġinc":439,"Ġ;":440,"Ġcomp":441,"ru":442,"ions":443,"Ġtheir":444,"Ġbut":445,"ide":446,"ure":447,"so":448,"Ġcont":449,"Ġint":450,"fter":451,"ical":452,"ial":453,"Ġar":454,"Ġfirst":455,"ould":456,"Ġits":457,"hed":458,"ĠâĢĵ":459,"Ġwhe":460,"wo":461,"out":462,"ub":463,"Ġ20":464,"ff":465,"Ġ:":466,"ue":467,"Ġher":468,"own":469,"ok":470,"Ġalso":471,"Ġcl":472,"per":473,"ign":474,"ater":475,"ran":476,"orm":477,"ie":478,"ome":479,"ork":480,"ass":481,"ire":482,"end":483,"Ġres":484,"Ġab":485,"Ġad":486,"Ġus":487,"ry":488,"Ġrec":489,"Ġhave":490,"age":491,"ĠHe":492,"Ġ4":493,"Ġro":494,"mer":495,"Ġone":496,"ond":497,"low":498,"Ġhas":499,"ĠTh":500,"du":501,"Ġ5":502,"Ġper":503,"Ġbeen":504,"ime":505,"Ġtwo":506,"ence":507,"land":508,"Ġ18":509,".@":510,"Ġ@.@":511,"ult":512,"ree":513,"ough":514,"ile":515,"Ġwho":516,"ĠAl":517,"Ġsc":518,"uring":519,"pl":520,"ory":521,"ition":522,"ric":523,"ations":524,"Ġdis":525,"Ġthis":526,"Ġbec":527,"Ġapp":528,"iz":529,"ĠIt":530,"are":531,"ach":532,"lud":533,"ade":534,"Ġplay":535,"Ġj":536,"Ġman":537,"act":538,"ely":539,"Ġpart":540,"Ġdes":541,"Ġag":542,"Ġthey":543,"Ġyear":544,"ount":545,"Ġ201":546,"Ġover":547,"Ġother":548,"ound":549,"Ġafter":550,"ib":551,"over":552,"Ġser":553,"Ġen":554,"Ġoff":555,"Ġim":556,"ction":557,"ĠY":558,"ke":559,"ite":560,",@":561,"Ġ@,@":562,"te":563,"urn":564,"Ġinclud":565,"ress":566,"ance":567,"ang":568,"Ġatt":569,"ice":570,"ace":571,"ark":572,"Ġout":573,"wn":574,"ph":575,"ember":576,"Ġpre":577,"Ġup":578,"ens":579,"man":580,"Ġev":581,"Ġtime":582,"nder":583,"rough":584,"ced":585,"Ġfin":586,"Ġinto":587,"one":588,"port":589,"round":590,"we":591,"ren":592,"les":593,"int":594,"ĠOn":595,"vel":596,"Ġcomm":597,"Ġshe":598,"ason":599,"amp":600,"Ġte":601,"Ġwould":602,"ward":603,"Ġmore":604,"Ġ6":605,"ied":606,"ose":607,"rib":608,"ĠUn":609,"Ġall":610,"ings":611,"tern":612,"ces":613,"able":614,"Ġwe":615,"ited":616,"ever":617,"ents":618,"Ġhim":619,"ased":620,"ors":621,"oy":622,"ood":623,"Ġcent":624,"ix":625,"ase":626,"ild":627,"ĠAn":628,"Ġ7":629,"Ġwork":630,"ates":631,"ious":632,"ath":633,"Ġpo":634,"rop":635,"old":636,"als":637,"iss":638,"ey":639,"ict":640,"Ġfe":641,"Ġthem":642,"gan":643,"Ġsec":644,"Ġbet":645,"Ġwhen":646,"Ġsong":647,"Ġrem":648,"ep":649,"form":650,"ail":651,"fer":652,"Ġear":653,"ubl":654,"aw":655,"Ġkn":656,"ake":657,"aus":658,"Ġmost":659,"Ġcons":660,"Ġduring":661,"ĠAs":662,"orth":663,"Ġnew":664,"ered":665,"ilm":666,"ved":667,"att":668,"Ġonly":669,"Ġ9":670,"Ġdec":671,"Ġ8":672,"ick":673,"Ġgame":674,"ons":675,"ug":676,"Ġtr":677,"ft":678,"oth":679,"ook":680,"ĠMar":681,"reat":682,"way":683,"Ġcan":684,"ollow":685,"outh":686,"ween":687,"ĠEn":688,"Ġ199":689,"ters":690,"Ġrel":691,"ind":692,"Ġabout":693,"Ġseason":694,"Ġagain":695,"ral":696,"Ġthree":697,"ational":698,"Ġunder":699,"ular":700,"Ġme":701,"Ġthan":702,"ĠCom":703,"ĠAr":704,"hip":705,"ob":706,"Ġne":707,"Ġbetween":708,"Ġfl":709,"hn":710,"ve":711,"Ġchar":712,"Ġcol":713,"Ġrecord":714,"iew":715,"ron":716,"fore":717,"Ġthrough":718,"ision":719,"orn":720,"Ġ00":721,"ock":722,"Ġver":723,"Ġlater":724,"Ġnum":725,"Ġend":726,"olog":727,"ames":728,"Ġpos":729,"Ġwrit":730,"Ġprodu":731,"Ġwhile":732,"Ġact":733,"Ġrele":734,"Ġfilm":735,"ished":736,"Ġpr":737,"ans":738,"Ġreg":739,"Ġform":740,"Ġass":741,"ĠSe":742,"ury":743,"ted":744,"ts":745,"Ġmade":746,"Ġsub":747,"Ġpe":748,"Ġso":749,"orld":750,"Ġret":751,"ĠNew":752,"Ġspec":753,"Ġacc":754,"Ġqu":755,"Ġwhere":756,"ener":757,"Ġmov":758,"hes":759,"meric":760,"ating":761,"Ġinter":762,"ĠLe":763,"ĠAmeric":764,"Ġra":765,"Ġsome":766,"Ġco":767,"Ġlar":768,"Ġbu":769,"Ġdef":770,"bum":771,"Ġac":772,"Ġmus":773,"Ġfollow":774,"ĠAt":775,"ins":776,"ived":777,"ific":778,"ual":779,"Ġam":780,"Ġsuch":781,"Ġsecond":782,"ike":783,"Ġfour":784,"Ġind":785,"ann":786,"hen":787,"Ġused":788,"ĠRe":789,"ics":790,"lect":791,"Ġday":792,"iel":793,"ily":794,"ĠThis":795,"Ġ0":796,"Ġpubl":797,"Ġcall":798,"ĠJo":799,"ll":800,"Ġalbum":801,"Ġ000":802,"rans":803,"Ġdo":804,"any":805,"Ġbefore":806,"ros":807,"ĠSh":808,"Ġsy":809,"aid":810,"ĠEng":811,"Ġbeing":812,"Ġ10":813,"uc":814,"Ġep":815,"Ġsupp":816,"Ġthere":817,"Ġyears":818,"ars":819,"owever":820,"Ġent":821,"ife":822,"Ġhigh":823,"Ġfound":824,"ird":825,"Ġno":826,"Ġset":827,"ines":828,"iver":829,"io":830,"other":831,"ject":832,"Ġsur":833,"aj":834,"ten":835,"Ġtra":836,"Ġ12":837,"ised":838,"ities":839,"velop":840,"Ġbl":841,"ale":842,"Ġseries":843,"Ġloc":844,"Ġnumber":845,"Ġpres":846,"ane":847,"ause":848,"ode":849,"ek":850,"ton":851,"ĠSc":852,"ier":853,"ise":854,"Ġsever":855,"ince":856,"Ġboth":857,"ank":858,"row":859,"irect":860,"son":861,"Ġthen":862,"ĠBrit":863,"iet":864,"Ġ16":865,"Ġepis":866,"Ġincluding":867,"its":868,"igin":869,"pr":870,"Ġ/":871,"Ġagainst":872,"Ġwell":873,"Ġbecame":874,"Ġexp":875,"Ġknown":876,"Ġtrans":877,"Ġcharac":878,"ĠâĢĶ":879,"ram":880,"Ġback":881,"Ġadd":882,"Ġpop":883,"Ġgo":884,"urch":885,"Ġdesc":886,"Ġsing":887,"ield":888,"Ġperform":889,"ained":890,"Ġrece":891,"ident":892,"Ġem":893,"ert":894,"ures":895,"Ġinv":896,"Ġdep":897,"Ġ198":898,"air":899,"ern":900,"ather":901,"ful":902,"ĠZ":903,"Ġmon":904,"Ġmany":905,"Ġmain":906,"Ġstud":907,"Ġlong":908,"inn":909,"though":910,"up":911,"ool":912,"ĠUnited":913,"led":914,"ement":915,"Ġ15":916,"ower":917,"ĠJohn":918,"Ġop":919,"Ġ11":920,"ined":921,"Ġmet":922,"ober":923,"ley":924,"Ġ17":925,"Ġcentury":926,"Ġteam":927,"Ġest":928,"ĠAfter":929,"yl":930,"Ġmin":931,"uch":932,"ute":933,"Ġdevelop":934,"ĠShe":935,"iam":936,"Ġshow":937,"elf":938,"Ġrep":939,"Ġconc":940,"ative":941,"Ġcre":942,"overn":943,"ared":944,"Ġ194":945,"Ġorigin":946,"Ġsm":947,"ivers":948,"az":949,"Ġlead":950,"Ġseveral":951,"ah":952,"Ġob":953,"Ġrev":954,"Ġmill":955,"erm":956,"ually":957,"oot":958,"Ġbegan":959,"Ġ196":960,"ired":961,"Ġdif":962,"Ġcontin":963,"Ġsign":964,"ik":965,"ĠInd":966,"ments":967,"ized":968,"Ġ197":969,"Ġdirect":970,"au":971,"Ġext":972,"ross":973,"emb":974,"der":975,"Ġpol":976,"Ġmay":977,"apt":978,"els":979,"ĠWh":980,"Ġcomple":981,"Ġart":982,"ĠBr":983,"ĠIs":984,"une":985,"til":986,"Ġcrit":987,"Ġhist":988,"Ġearly":989,"Ġcould":990,"ĠCon":991,"Ġdid":992,"Ġbel":993,"Ġcalled":994,"ued":995,"Ġnear":996,"Ġepisode":997,"yp":998,"Ġdescrib":999}