Commit 
							
							·
						
						9a127b5
	
1
								Parent(s):
							
							15626cf
								
add eval results
Browse files- .gitignore +1 -0
 - common.py +1 -1
 - data/txt360_eval/CKPT Eval - BoolQ.csv +68 -0
 - data/txt360_eval/CKPT Eval - GSM8K.csv +68 -0
 - data/txt360_eval/CKPT Eval - HellaSwag.csv +69 -0
 - data/txt360_eval/CKPT Eval - MATH.csv +68 -0
 - data/txt360_eval/CKPT Eval - MMLU.csv +68 -0
 - data/txt360_eval/CKPT Eval - MedQA.csv +68 -0
 - data/txt360_eval/CKPT Eval - NQ.csv +68 -0
 - data/txt360_eval/CKPT Eval - PIQA.csv +69 -0
 - data/txt360_eval/CKPT Eval - TriviaQA.csv +68 -0
 - data/txt360_eval/CKPT Eval - WinoGrande.csv +69 -0
 - main.py +9 -10
 - overview.py +1 -1
 - results.py +122 -14
 
    	
        .gitignore
    CHANGED
    
    | 
         @@ -1,2 +1,3 @@ 
     | 
|
| 1 | 
         
             
            .sesskey
         
     | 
| 2 | 
         
             
            *.pyc
         
     | 
| 
         | 
| 
         | 
|
| 1 | 
         
             
            .sesskey
         
     | 
| 2 | 
         
             
            *.pyc
         
     | 
| 3 | 
         
            +
            .DS_Store
         
     | 
    	
        common.py
    CHANGED
    
    | 
         @@ -282,7 +282,7 @@ table_div_pii = Div(NotStr(table_html_pii), style="display: flex; justify-conten 
     | 
|
| 282 | 
         | 
| 283 | 
         
             
            global_div = Div(
         
     | 
| 284 | 
         
             
                Section(
         
     | 
| 285 | 
         
            -
                    H2("Overview of  
     | 
| 286 | 
         
             
                    H3("What This Section Contains"),
         
     | 
| 287 | 
         
             
                    P(
         
     | 
| 288 | 
         
             
                        "This section discusses all details related to deduplication and filterings steps that were uniformly applied to all data. The section is split into the following topic areas: "
         
     | 
| 
         | 
|
| 282 | 
         | 
| 283 | 
         
             
            global_div = Div(
         
     | 
| 284 | 
         
             
                Section(
         
     | 
| 285 | 
         
            +
                    H2("Overview of Shared Processing Steps"),
         
     | 
| 286 | 
         
             
                    H3("What This Section Contains"),
         
     | 
| 287 | 
         
             
                    P(
         
     | 
| 288 | 
         
             
                        "This section discusses all details related to deduplication and filterings steps that were uniformly applied to all data. The section is split into the following topic areas: "
         
     | 
    	
        data/txt360_eval/CKPT Eval - BoolQ.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            0-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base
         
     | 
| 2 | 
         
            +
            hf-time: 4 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192
         
     | 
| 3 | 
         
            +
            5k,0.5761,0.5624,,0.6116,0.5514,0.5945,0.5446,0.5336,0.5902,0.5908,0.5394,0.5865,0.5284
         
     | 
| 4 | 
         
            +
            10k,0.6242,0.5853,,0.6131,,0.5358,0.6122,0.6080,0.5471,0.5511,0.6138,0.5902,0.5780
         
     | 
| 5 | 
         
            +
            15k,0.6480,0.6291,,0.6061,0.6217,0.5468,0.6205,0.6242,0.6248,0.5917,0.6211,0.5933,0.5713
         
     | 
| 6 | 
         
            +
            20k,0.6541,0.6474,,0.5865,0.6187,0.6122,0.6199,0.6116,0.6119,0.5636,0.6239,0.5988,0.5850
         
     | 
| 7 | 
         
            +
            25k,0.6670,0.6012,,0.6398,0.6251,0.6162,0.6349,0.6239,0.6291,0.5630,0.6336,0.6232,0.6312
         
     | 
| 8 | 
         
            +
            30k,0.6777,0.6523,,0.6379,0.6083,0.6260,0.6437,0.6263,0.6107,0.5835,0.5865,0.6391,0.6425
         
     | 
| 9 | 
         
            +
            35k,0.6495,0.6584,,0.6388,,0.6333,0.6346,0.6343,0.6144,0.4933,0.6043,0.6278,0.6480
         
     | 
| 10 | 
         
            +
            40k,0.6771,0.6930,,0.6489,0.6410,0.6596,0.6330,0.6214,0.6520,0.5685,0.5768,0.6343,0.6505
         
     | 
| 11 | 
         
            +
            45k,0.6624,0.6887,,0.6590,0.6422,0.6223,0.6401,0.6131,0.6153,0.5578,0.6058,0.6336,0.6529
         
     | 
| 12 | 
         
            +
            50k,0.6761,0.6951,,0.6575,0.6566,0.6593,0.6557,0.6058,0.6541,0.5972,0.6018,0.6177,0.6563
         
     | 
| 13 | 
         
            +
            55k,0.6847,0.6725,,0.6752,0.6321,0.6688,0.6523,0.6520,0.6679,0.5908,0.5343,0.6214,0.6618
         
     | 
| 14 | 
         
            +
            60k,0.6920,0.6697,,0.6566,0.6226,0.6642,0.6401,0.6162,0.6361,0.5908,0.5972,0.6226,0.6645
         
     | 
| 15 | 
         
            +
            65k,0.6979,0.6905,,0.6865,0.6352,0.6758,0.6688,0.6691,0.6942,0.6315,0.5682,0.6196,0.6352
         
     | 
| 16 | 
         
            +
            70k,0.7104,0.6966,,0.6795,0.6456,0.6746,0.6651,0.6624,0.6575,0.5997,0.5324,0.6358,0.6526
         
     | 
| 17 | 
         
            +
            75k,0.7269,0.6850,,0.6862,0.6514,,0.6621,0.6774,0.6817,0.6217,0.6009,0.6453,0.6535
         
     | 
| 18 | 
         
            +
            80k,0.6997,0.6817,,0.6945,0.6327,0.6664,0.6667,0.6709,0.6703,0.6275,0.5896,0.6502,0.6612
         
     | 
| 19 | 
         
            +
            85k,0.7346,0.6939,,0.6853,0.6746,0.6902,0.6602,0.6330,0.6737,0.6272,0.5239,0.6489,0.6703
         
     | 
| 20 | 
         
            +
            90k,0.7254,0.6908,,0.6936,0.6612,0.6713,0.6755,0.6835,0.6315,0.6275,0.5428,0.6128,0.6807
         
     | 
| 21 | 
         
            +
            95k,0.7165,0.7229,,0.7003,0.6587,,0.6823,0.6404,0.6670,0.6089,0.6138,0.6456,0.6612
         
     | 
| 22 | 
         
            +
            100k,0.7153,0.7073,,0.6869,,0.6676,0.6746,0.6618,0.6587,0.6006,0.5584,0.6566,0.6810
         
     | 
| 23 | 
         
            +
            105k,0.7333,0.7147,,0.6682,,0.6899,0.6609,0.6853,0.6853,0.6544,0.5740,0.6520,0.6755
         
     | 
| 24 | 
         
            +
            110k,0.7376,0.7095,,0.6954,0.6664,0.6703,0.6810,0.6612,0.6798,0.6618,,0.6346,0.6434
         
     | 
| 25 | 
         
            +
            115k,0.7168,0.7095,,0.7156,0.6645,0.6746,0.6997,0.6829,0.6813,0.6523,,0.6596,0.6920
         
     | 
| 26 | 
         
            +
            120k,0.7370,0.7226,,0.7177,0.6648,0.6752,0.7015,,0.6841,0.6633,,0.6587,0.6890
         
     | 
| 27 | 
         
            +
            125k,0.7361,0.7144,,0.7034,0.6636,0.6826,0.6869,0.6657,,0.6593,,0.6593,0.6795
         
     | 
| 28 | 
         
            +
            130k,0.7284,0.7269,,0.6939,0.6786,0.6554,0.6988,0.6719,0.6777,0.6260,,,0.7018
         
     | 
| 29 | 
         
            +
            135k,0.7483,0.7141,,0.7128,,0.6847,0.7028,0.6838,0.6933,0.6602,,,0.6966
         
     | 
| 30 | 
         
            +
            140k,,0.7312,,0.7080,,0.6777,0.6997,0.6957,0.7040,0.6624,,,0.6884
         
     | 
| 31 | 
         
            +
            145k,,,,0.7281,,0.6844,0.6908,0.6743,0.6914,0.6657,,,0.7061
         
     | 
| 32 | 
         
            +
            150k,,,,0.7297,,0.6795,,0.6807,0.6991,0.6526,,,0.7024
         
     | 
| 33 | 
         
            +
            155k,,,,0.7162,,0.7021,0.6976,0.6792,0.6927,0.6587,,,0.7028
         
     | 
| 34 | 
         
            +
            160k,,,,0.6902,,0.6810,0.6985,0.6930,0.6893,0.6434,,,0.7098
         
     | 
| 35 | 
         
            +
            165k,,,,0.7239,,0.6896,0.7037,,0.7021,0.6581,,,0.7080
         
     | 
| 36 | 
         
            +
            170k,,,,0.7471,,0.6780,0.7141,,0.6911,0.6761,,,0.7058
         
     | 
| 37 | 
         
            +
            175k,,,,0.7486,,0.6817,0.6942,,0.7095,0.6557,,,0.7021
         
     | 
| 38 | 
         
            +
            180k,,,,0.6985,,0.6979,0.7162,,0.7067,0.6468,,,0.6523
         
     | 
| 39 | 
         
            +
            185k,,,,0.7187,,0.6887,0.7031,,0.6917,0.6642,,,0.6914
         
     | 
| 40 | 
         
            +
            190k,,,,0.7333,,0.6963,,,0.7113,0.6563,,,0.718
         
     | 
| 41 | 
         
            +
            195k,,,,0.7269,,0.7021,,,0.7199,0.6817,,,0.7165
         
     | 
| 42 | 
         
            +
            200k,,,,0.7135,,0.7080,,,0.707,0.6709,,,0.7015
         
     | 
| 43 | 
         
            +
            205k,,,,0.7388,,0.7015,,,0.7168,0.6722,,,0.722
         
     | 
| 44 | 
         
            +
            210k,,,,0.7489,,0.7089,,,,0.6765,,,0.6948
         
     | 
| 45 | 
         
            +
            215k,,,,0.7538,,0.7183,,,0.7309,0.6869,,,0.6835
         
     | 
| 46 | 
         
            +
            220k,,,,0.7474,,0.7171,,,0.7398,0.6893,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.7251,,0.7131,,,0.7061,0.6801,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.7083,,,,,0.7232,0.6765,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.6930,,,,,0.6884,0.6434,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.7541,,,,,,0.6875,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.7541,,,,,,0.6713,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.7498,,,,,,0.6798,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.7749,,,,,,0.6578,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.7615,,,,,,0.6954,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.7486,,,,,,0.6807,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.7226,,,,,,0.6869,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.7269,,,,,,0.6841,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.7517,,,,,,0.6804,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.7150,,,,,,0.7006,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.6826,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.6706,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.7006,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.6777,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,0.6859,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,0.6939,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - GSM8K.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            5-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base
         
     | 
| 2 | 
         
            +
            hf-time: 115 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192
         
     | 
| 3 | 
         
            +
            5k,0.0152,0.0099,,,0.0076,0.0015,0.0045,0.0030,,0.0152,0.0106,0.0197,0.0197
         
     | 
| 4 | 
         
            +
            10k,0.0152,0.0190,,0.0015,,0.0091,0.0000,0.0212,0.0144,0.0159,0.0136,0.0174,0.0243
         
     | 
| 5 | 
         
            +
            15k,0.0182,0.0167,,0.0053,0.0068,0.0045,0.0083,0.0212,0.0068,0.0174,0.0190,0.0174,0.0136
         
     | 
| 6 | 
         
            +
            20k,0.0250,0.0212,,,,,0.0030,0.0159,0.0220,0.0167,0.0190,0.0220,0.0174
         
     | 
| 7 | 
         
            +
            25k,0.0288,0.0114,,,,0.0129,0.0053,0.0258,0.0144,0.0152,0.0144,0.0144,0.0144
         
     | 
| 8 | 
         
            +
            30k,0.0220,0.0265,,0.0197,0.0038,0.0152,0.0167,0.0227,0.0220,0.0205,0.0129,0.0167,0.0038
         
     | 
| 9 | 
         
            +
            35k,0.0296,0.0212,,0.0136,0.0045,0.0190,0.0045,0.0227,0.0220,0.0174,0.0174,0.0243,0.0182
         
     | 
| 10 | 
         
            +
            40k,0.0235,0.0288,,0.0068,0.0121,0.0220,0.0015,0.0243,0.0265,0.0152,0.0212,0.0190,0.0182
         
     | 
| 11 | 
         
            +
            45k,0.0387,0.0250,,0.0258,0.0038,0.0273,0.0106,0.0296,0.0273,0.0182,0.0152,0.0174,0.0129
         
     | 
| 12 | 
         
            +
            50k,0.0318,0.0303,,0.0015,0.0243,0.0227,0.0121,0.0190,0.0220,0.0197,0.0205,0.0182,0.0068
         
     | 
| 13 | 
         
            +
            55k,0.0296,0.0311,,0.0023,0.0235,0.0235,0.0250,0.0326,0.0197,0.0182,0.0174,0.0250,0.0091
         
     | 
| 14 | 
         
            +
            60k,0.0432,0.0326,,0.0167,0.0212,0.0212,0.0182,0.0349,0.0220,0.0182,0.0099,0.0190,0.0197
         
     | 
| 15 | 
         
            +
            65k,0.0470,0.0379,,0.0015,0.0159,0.0281,0.0136,0.0296,0.0212,0.0212,0.0129,0.0205,0.0114
         
     | 
| 16 | 
         
            +
            70k,0.0432,0.0417,,0.0136,0.0197,0.0174,0.0114,0.0341,0.0243,0.0205,0.0136,0.0250,0.0091
         
     | 
| 17 | 
         
            +
            75k,0.0508,0.0470,,0.0174,0.0121,0.0250,0.0182,0.0356,0.0288,0.0281,0.0174,0.0190,0.0106
         
     | 
| 18 | 
         
            +
            80k,0.0561,0.0417,,0.0068,0.0000,0.0190,0.0083,0.0318,0.0356,0.0273,0.0167,0.0265,0.0182
         
     | 
| 19 | 
         
            +
            85k,0.0728,0.0341,,0.0341,0.0190,0.0296,0.0205,0.0265,0.0250,0.0220,0.0129,0.0235,0.0083
         
     | 
| 20 | 
         
            +
            90k,0.0690,0.0425,,0.0197,0.0190,0.0281,0.0061,0.0417,0.0265,0.0273,0.0167,0.0190,0.0182
         
     | 
| 21 | 
         
            +
            95k,0.0735,0.0447,,0.0167,0.0250,0.0281,0.0136,0.0349,0.0281,0.0174,0.0106,0.0288,0.0159
         
     | 
| 22 | 
         
            +
            100k,0.0637,0.0470,,0.0159,,0.0227,0.0045,0.0409,0.0311,0.0265,0.0205,0.0190,0.0190
         
     | 
| 23 | 
         
            +
            105k,0.0637,0.0447,,0.0341,,0.0303,0.0129,0.0371,0.0311,0.0273,0.0205,0.0311,0.0129
         
     | 
| 24 | 
         
            +
            110k,0.0872,0.0576,,0.0038,0.0273,0.0129,0.0205,0.0478,0.0296,0.0212,,0.0281,0.0182
         
     | 
| 25 | 
         
            +
            115k,0.0788,0.0576,,0.0091,0.0167,0.0311,0.0167,0.0508,0.0349,0.0220,,0.0220,0.0174
         
     | 
| 26 | 
         
            +
            120k,0.0834,0.0455,,0.0227,0.0265,0.0167,0.0212,0.0371,0.0318,0.0167,,0.0220,0.0152
         
     | 
| 27 | 
         
            +
            125k,0.1001,0.0493,,0.0288,0.0250,0.0205,0.0387,0.0402,0.0318,0.0182,,0.0235,0.0144
         
     | 
| 28 | 
         
            +
            130k,0.0766,0.0470,,0.0068,0.0258,0.0288,0.0174,,0.0341,0.0243,,,0.0205
         
     | 
| 29 | 
         
            +
            135k,0.0879,0.0607,,0.0190,,0.0349,0.0258,0.0409,0.0288,0.0212,,,0.0281
         
     | 
| 30 | 
         
            +
            140k,,0.0569,,0.0379,,0.0356,0.0227,0.0440,0.0341,0.0144,,,0.0144
         
     | 
| 31 | 
         
            +
            145k,,,,0.0341,,0.0379,0.0015,0.0387,,0.0174,,,0.0273
         
     | 
| 32 | 
         
            +
            150k,,,,,,0.0281,,0.0470,0.0265,0.0220,,,0.0258
         
     | 
| 33 | 
         
            +
            155k,,,,0.0318,,0.0303,0.0121,0.0561,0.0523,0.0227,,,0.0243
         
     | 
| 34 | 
         
            +
            160k,,,,0.0356,,0.0243,0.0061,0.0425,0.0432,0.0220,,,0.0303
         
     | 
| 35 | 
         
            +
            165k,,,,0.0167,,0.0409,0.0015,,0.0470,0.0281,,,
         
     | 
| 36 | 
         
            +
            170k,,,,0.0334,,0.0281,0.0129,,0.0455,0.0273,,,0.0235
         
     | 
| 37 | 
         
            +
            175k,,,,0.0371,,0.0326,0.0190,,0.0409,0.0190,,,0.0273
         
     | 
| 38 | 
         
            +
            180k,,,,0.0425,,0.0364,0.0227,,0.0356,0.0243,,,0.0288
         
     | 
| 39 | 
         
            +
            185k,,,,0.0341,,0.0318,0.0341,,0.0546,0.0235,,,0.0364
         
     | 
| 40 | 
         
            +
            190k,,,,0.0296,,0.0364,,,0.0425,0.0220,,,0.0349
         
     | 
| 41 | 
         
            +
            195k,,,,0.0250,,0.0303,,,0.0493,0.0258,,,
         
     | 
| 42 | 
         
            +
            200k,,,,0.0250,,0.0371,,,0.0493,0.0273,,,0.0205
         
     | 
| 43 | 
         
            +
            205k,,,,0.0455,,0.0409,,,0.0553,0.0220,,,0.0258
         
     | 
| 44 | 
         
            +
            210k,,,,0.0462,,0.0371,,,0.0523,0.0281,,,
         
     | 
| 45 | 
         
            +
            215k,,,,0.0349,,0.0265,,,0.0500,0.0235,,,0.0281
         
     | 
| 46 | 
         
            +
            220k,,,,0.0432,,0.0167,,,0.0462,0.0326,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.0447,,0.0212,,,,0.0265,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.0440,,,,,0.0493,0.0273,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.0402,,,,,0.0508,0.0220,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.0341,,,,,,0.0281,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.0462,,,,,,0.0356,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.0500,,,,,,,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.0569,,,,,,0.0303,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.0500,,,,,,0.0334,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.0455,,,,,,0.0318,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.0538,,,,,,0.0273,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.0470,,,,,,,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.0553,,,,,,0.0364,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.0531,,,,,,0.0349,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.0311,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.0311,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.0273,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - HellaSwag.csv
    ADDED
    
    | 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ga,Slim-Pajama 600B (bsz=4K x 1024),,,,,,FineWeb-1.5T,,Ours-Base,,Ours-Upsampling1,,Ours-Upsampling2,,Ours-Code-Upsampling2,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,DCLM-Base,
         
     | 
| 2 | 
         
            +
            0-shot: 5 min,Llama-8x8B-baseline,,Llama-8x8B-seq8192,,Llama-8x8B-mup,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-1x8B-seq8192,,Llama_extend-1x8B-seq8192,,Jais-1x8B-seq8192,,Llama-1x8B-seq8192,
         
     | 
| 3 | 
         
            +
            10-shot: 36 min,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot,0-shot,10-shot
         
     | 
| 4 | 
         
            +
            5k,0.5315,0.5301,0.5165,0.5693,,,0.5622,0.5376,0.5254,0.5119,0.5356,0.5291,0.5324,0.5210,0.5377,0.5339,0.5366,0.5278,0.4507,0.4300,0.4413,0.4262,0.4497,0.4397,0.4624,0.4469
         
     | 
| 5 | 
         
            +
            10k,0.6076,0.6008,0.5949,0.5693,,,0.6433,0.6202,0.5836,0.5827,,0.5975,0.6046,0.5886,0.6036,0.5987,0.6139,0.5901,0.5279,0.4889,0.5141,0.4872,0.5219,0.5028,0.5454,0.5150
         
     | 
| 6 | 
         
            +
            15k,0.6422,0.6278,0.6314,0.5998,,,0.6716,0.6367,0.6114,0.6002,0.6281,0.6079,0.6336,0.6118,0.6399,0.6266,0.6388,0.6172,0.5495,0.5211,0.5444,0.5142,0.5469,0.5096,0.5785,0.5484
         
     | 
| 7 | 
         
            +
            20k,0.6616,0.6424,0.6496,0.6244,,,0.6855,,0.6271,0.6223,0.6461,0.6230,0.6492,0.6329,0.6511,0.6475,0.6548,0.6382,0.5685,0.5310,0.5579,0.5270,0.5813,0.5377,0.5946,0.5649
         
     | 
| 8 | 
         
            +
            25k,0.6738,0.6577,0.6683,0.6390,,,0.6945,0.6662,0.6413,,0.6612,0.6404,0.6665,0.6417,0.6652,0.6629,0.6683,0.6499,0.5759,0.5369,0.5787,0.5486,0.5864,0.5598,0.6105,0.5796
         
     | 
| 9 | 
         
            +
            30k,0.6863,0.6656,0.6758,0.6368,,,0.7059,0.6639,,0.6387,0.6692,0.6425,0.6746,0.6485,0.6708,0.6584,0.6741,0.6587,0.5891,0.5490,0.5915,0.5437,0.5990,0.5625,0.6197,0.5897
         
     | 
| 10 | 
         
            +
            35k,0.6956,0.6762,0.6850,0.6430,,,0.7158,0.6602,0.6547,0.6420,,0.6348,0.6832,0.6572,0.6816,0.6705,0.6864,0.6682,0.5985,0.5590,0.5954,0.5553,0.6090,0.5667,0.6343,0.6046
         
     | 
| 11 | 
         
            +
            40k,0.7022,0.6812,0.6966,0.6524,,,0.7184,0.6814,0.6642,0.6452,0.6751,0.6347,0.6821,0.6533,0.6865,0.6717,0.6917,0.6646,0.6015,0.5595,0.6033,0.5592,0.6112,0.5704,0.6429,0.6100
         
     | 
| 12 | 
         
            +
            45k,0.7048,0.6954,0.6991,0.6583,,,0.7220,0.6921,0.6698,0.6479,0.6802,,0.6905,0.6616,0.6919,0.6812,0.6933,0.6704,0.6103,0.5663,0.6040,0.5623,0.6175,0.5773,0.6473,0.6212
         
     | 
| 13 | 
         
            +
            50k,0.7171,0.6998,0.7041,0.6574,,,0.7250,0.6785,0.6689,0.6611,0.6931,0.6726,0.6964,0.6720,0.6905,0.6715,0.7018,0.6902,0.6106,0.5510,0.6138,0.5676,0.6230,0.5934,0.6477,0.6109
         
     | 
| 14 | 
         
            +
            55k,0.7187,0.7012,0.7080,0.6768,,,0.7305,0.6967,0.6697,0.6571,0.6899,0.6614,0.6959,0.6764,0.7047,0.6816,0.7052,0.6799,0.6182,0.5759,0.6200,0.5753,0.6260,0.5857,0.6518,0.6145
         
     | 
| 15 | 
         
            +
            60k,0.7240,0.7037,0.7129,0.6748,,,0.7236,0.6955,0.6748,0.6573,0.6941,0.6584,0.6904,0.6850,0.6982,0.6731,0.7040,0.6767,0.6207,0.5849,0.6217,0.5711,0.6318,0.5744,0.6566,0.6204
         
     | 
| 16 | 
         
            +
            65k,0.7297,0.7130,0.7142,0.6700,,,0.7355,0.6994,0.6752,0.6590,0.6907,0.6598,0.7061,0.6772,0.6963,0.6824,0.7074,0.6857,0.6235,0.5766,0.6299,0.5750,0.6381,0.5973,0.6544,0.6264
         
     | 
| 17 | 
         
            +
            70k,0.7298,0.7148,0.7224,0.6796,,,0.7399,0.7034,0.6773,0.6631,0.6968,0.6735,0.7054,0.6789,0.7043,0.6936,0.7074,0.6883,0.6294,0.5982,0.6341,0.5872,0.6403,0.5928,0.6617,0.6220
         
     | 
| 18 | 
         
            +
            75k,0.7329,0.7144,0.7261,0.6972,,,0.7374,0.6934,0.6854,0.6661,0.7014,0.6622,0.7065,0.6843,0.7029,0.6837,0.7027,0.6853,0.6285,0.5932,0.6336,0.5830,0.6376,0.5907,0.6706,0.6237
         
     | 
| 19 | 
         
            +
            80k,0.7414,0.7271,0.7316,0.6937,,,0.7422,0.6989,0.6862,0.6717,0.7051,0.6762,0.7118,0.6954,0.7178,0.6909,0.7139,0.6908,0.6315,0.5898,0.6363,0.5877,0.6491,0.5968,0.6710,0.6032
         
     | 
| 20 | 
         
            +
            85k,0.7449,0.7278,0.7334,0.7011,,,0.7444,0.7101,0.6887,0.6635,0.7086,0.6739,0.7126,0.6872,0.7052,0.6927,0.7178,0.7048,0.6359,0.5970,0.6375,0.5941,0.6380,0.5897,0.6789,0.6203
         
     | 
| 21 | 
         
            +
            90k,0.7483,0.7379,0.7379,0.6949,,,0.7443,0.7064,0.6917,0.6818,0.7079,0.6804,0.7148,0.6926,0.7106,0.6976,0.7146,0.6818,0.6400,0.6052,0.6327,0.5846,0.6521,0.6120,0.6781,0.6271
         
     | 
| 22 | 
         
            +
            95k,0.7510,0.7411,0.7427,0.6987,,,0.7376,0.6943,0.6901,0.6719,0.7097,0.6616,0.7115,0.6946,0.7221,0.6979,0.7240,0.6953,0.6388,0.5870,0.6373,0.5899,0.6460,0.5970,0.6798,0.6320
         
     | 
| 23 | 
         
            +
            100k,0.7550,0.7419,0.7437,0.7070,,,0.7457,0.7153,,,0.7060,0.6902,0.7117,0.6955,0.7167,0.7002,0.7241,0.7013,0.6447,0.6079,0.6431,0.5916,0.6490,0.6095,0.6854,0.6467
         
     | 
| 24 | 
         
            +
            105k,0.7547,0.7424,0.7445,0.7042,,,0.7476,0.7158,,,0.7141,0.6804,0.7132,0.6953,0.7222,0.6980,0.7263,0.6912,0.6470,0.6060,0.6473,0.5908,0.6588,0.6023,0.6809,0.6144
         
     | 
| 25 | 
         
            +
            110k,0.7605,0.7491,0.7540,0.7070,,,0.7486,0.7210,0.6942,0.6850,0.7107,0.6696,0.7166,0.6883,0.7221,0.7020,0.7284,0.7013,0.6482,0.6196,,,0.6620,0.6166,0.6888,0.6269
         
     | 
| 26 | 
         
            +
            115k,0.7626,0.7491,0.7540,0.7070,,,0.7522,0.7213,0.6957,0.6832,0.7120,0.6698,0.7179,0.6955,0.7284,0.7101,0.7274,0.7045,0.6511,0.6004,,,0.6636,0.5998,0.6882,0.6250
         
     | 
| 27 | 
         
            +
            120k,0.7641,0.7545,0.7532,0.7110,,,0.7520,0.7217,0.7022,0.6911,0.7139,0.6855,0.7224,0.7017,0.7132,0.6866,0.7329,0.7089,0.6532,0.6145,,,0.6611,0.6085,0.6874,0.6250
         
     | 
| 28 | 
         
            +
            125k,0.7636,0.7552,0.7538,0.7126,,,0.7533,0.7195,0.7029,0.6946,0.7211,0.6944,0.7221,0.6937,0.7250,0.7155,0.7285,0.7194,0.6571,0.6184,,,0.6624,0.6071,0.6896,0.6294
         
     | 
| 29 | 
         
            +
            130k,0.7619,0.7547,0.7539,0.7168,,,0.7573,0.7178,0.7032,0.6929,0.7195,0.6969,0.7261,0.7103,0.7320,0.7221,0.7337,0.7096,0.6593,0.6174,,,,,0.6929,0.6273
         
     | 
| 30 | 
         
            +
            135k,0.7641,0.7570,0.7543,0.7162,,,0.7580,0.7324,,,0.7177,0.6978,0.7198,0.6969,0.7249,0.7162,0.7324,0.7107,0.6584,0.6316,,,,,0.6941,0.6392
         
     | 
| 31 | 
         
            +
            140k,,,0.7615,0.7250,,,0.7596,0.7329,,,0.7236,0.7106,0.7245,0.7140,0.7306,0.7228,0.7338,0.7099,0.6577,0.6142,,,,,0.6925,0.6334
         
     | 
| 32 | 
         
            +
            145k,,,,,,,0.7573,0.7207,,,0.7194,0.7040,0.7247,0.7077,0.7347,0.7231,0.7431,0.7195,0.6628,0.6295,,,,,0.6984,0.6543
         
     | 
| 33 | 
         
            +
            150k,,,,,,,0.7614,0.7352,,,0.7170,0.7029,,,0.7304,0.7116,0.7386,0.7233,0.6592,0.6212,,,,,0.6978,0.6291
         
     | 
| 34 | 
         
            +
            155k,,,,,,,0.7579,0.7360,,,0.7245,0.7127,0.7294,0.7058,0.7378,0.7162,0.7448,0.7139,0.6662,0.6246,,,,,0.6929,0.6396
         
     | 
| 35 | 
         
            +
            160k,,,,,,,0.7606,0.7356,,,0.7199,0.6983,0.7279,0.7109,0.7343,0.7230,0.7385,0.7172,0.6666,0.6169,,,,,0.7009,0.6266
         
     | 
| 36 | 
         
            +
            165k,,,,,,,,0.7403,,,0.7249,0.7058,0.7297,0.7119,,,0.7493,0.7234,0.6680,0.6268,,,,,0.7003,0.6104
         
     | 
| 37 | 
         
            +
            170k,,,,,,,0.7696,0.7422,,,0.7262,0.7070,0.7323,0.7031,,,0.7499,0.7260,0.6710,0.6319,,,,,0.7010,0.6514
         
     | 
| 38 | 
         
            +
            175k,,,,,,,0.7745,0.7450,,,0.7303,0.7180,0.7338,0.7206,,,0.7502,0.7257,0.6707,0.6205,,,,,0.7047,0.6401
         
     | 
| 39 | 
         
            +
            180k,,,,,,,0.7676,0.7384,,,0.7299,0.7249,0.7316,0.7250,,,0.7457,0.7270,0.6721,0.6327,,,,,0.7079,0.6421
         
     | 
| 40 | 
         
            +
            185k,,,,,,,0.7678,0.7441,,,0.7319,0.7232,0.7354,0.7340,,,0.7519,0.7309,0.6732,0.6275,,,,,0.7050,0.6130
         
     | 
| 41 | 
         
            +
            190k,,,,,,,0.7701,0.7505,,,0.7336,0.7193,,,,,0.7493,0.7305,0.6729,0.6343,,,,,0.7097,0.6568
         
     | 
| 42 | 
         
            +
            195k,,,,,,,0.7730,0.7504,,,0.7293,0.7137,,,,,0.7579,0.7376,0.6774,0.6251,,,,,0.7074,0.6363
         
     | 
| 43 | 
         
            +
            200k,,,,,,,0.7753,0.7521,,,0.7366,0.7138,,,,,0.7567,0.7372,0.6795,0.6279,,,,,0.7122,0.6430
         
     | 
| 44 | 
         
            +
            205k,,,,,,,0.7744,0.7537,,,0.7360,0.7312,,,,,0.7560,0.7453,,0.6293,,,,,0.7175,0.6647
         
     | 
| 45 | 
         
            +
            210k,,,,,,,0.7729,0.7539,,,0.7368,0.7284,,,,,0.7658,0.7465,,0.6431,,,,,0.7179,0.6109
         
     | 
| 46 | 
         
            +
            215k,,,,,,,0.7804,0.7596,,,0.7359,0.7295,,,,,0.7621,0.7357,0.6819,0.6370,,,,,0.7136,0.6287
         
     | 
| 47 | 
         
            +
            220k,,,,,,,0.7752,0.7633,,,0.7384,0.7436,,,,,0.7678,0.7457,0.6860,0.6384,,,,,,
         
     | 
| 48 | 
         
            +
            225k,,,,,,,0.7808,0.7607,,,0.7340,0.7366,,,,,0.7649,0.7427,0.6805,0.6354,,,,,,
         
     | 
| 49 | 
         
            +
            230k,,,,,,,0.7786,0.7614,,,,,,,,,0.7662,0.7561,0.6855,0.6483,,,,,,
         
     | 
| 50 | 
         
            +
            235k,,,,,,,0.7844,0.7619,,,,,,,,,0.7676,0.7532,0.6880,0.6471,,,,,,
         
     | 
| 51 | 
         
            +
            240k,,,,,,,0.7866,0.7677,,,,,,,,,,,0.6841,0.6509,,,,,,
         
     | 
| 52 | 
         
            +
            245k,,,,,,,0.7857,0.7684,,,,,,,,,,,0.6850,0.6487,,,,,,
         
     | 
| 53 | 
         
            +
            250k,,,,,,,0.7851,0.7738,,,,,,,,,,,0.6892,0.6541,,,,,,
         
     | 
| 54 | 
         
            +
            255k,,,,,,,0.7845,0.7716,,,,,,,,,,,0.6875,0.6448,,,,,,
         
     | 
| 55 | 
         
            +
            260k,,,,,,,0.7893,0.7705,,,,,,,,,,,0.6945,0.6480,,,,,,
         
     | 
| 56 | 
         
            +
            265k,,,,,,,0.7918,0.7727,,,,,,,,,,,0.6933,0.6552,,,,,,
         
     | 
| 57 | 
         
            +
            270k,,,,,,,0.7917,0.7725,,,,,,,,,,,0.6980,0.6548,,,,,,
         
     | 
| 58 | 
         
            +
            275k,,,,,,,0.7925,0.7741,,,,,,,,,,,0.6950,0.6604,,,,,,
         
     | 
| 59 | 
         
            +
            280k,,,,,,,0.7943,0.7769,,,,,,,,,,,,0.6574,,,,,,
         
     | 
| 60 | 
         
            +
            285k,,,,,,,0.7946,0.7781,,,,,,,,,,,0.6970,0.6644,,,,,,
         
     | 
| 61 | 
         
            +
            290k,,,,,,,,,,,,,,,,,,,0.6970,0.6674,,,,,,
         
     | 
| 62 | 
         
            +
            300k,,,,,,,,,,,,,,,,,,,0.6969,0.6592,,,,,,
         
     | 
| 63 | 
         
            +
            305k,,,,,,,,,,,,,,,,,,,0.6997,0.6655,,,,,,
         
     | 
| 64 | 
         
            +
            310k,,,,,,,,,,,,,,,,,,,0.6988,0.6639,,,,,,
         
     | 
| 65 | 
         
            +
            315k,,,,,,,,,,,,,,,,,,,0.7023,0.6749,,,,,,
         
     | 
| 66 | 
         
            +
            320k,,,,,,,,,,,,,,,,,,,0.7012,0.6706,,,,,,
         
     | 
| 67 | 
         
            +
            325k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            330k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 69 | 
         
            +
            335k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - MATH.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            5-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base
         
     | 
| 2 | 
         
            +
            time: 5 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192
         
     | 
| 3 | 
         
            +
            5k,0.2335,0.2308,,0.2251,,0.2157,0.2221,0.2231,0.2211,0.2251,0.2191,0.2271,0.2238
         
     | 
| 4 | 
         
            +
            10k,0.2489,0.2519,,0.2379,0.2211,0.2332,0.2415,0.2342,0.2399,0.2285,0.2342,0.2402,0.2224
         
     | 
| 5 | 
         
            +
            15k,0.2626,0.2469,,0.2526,,0.2389,0.2322,0.2479,0.2580,0.2375,0.2271,0.2355,0.2375
         
     | 
| 6 | 
         
            +
            20k,0.2737,0.2606,,0.2469,0.2399,,0.2419,0.2526,0.2663,0.2469,0.2499,0.2439,0.2322
         
     | 
| 7 | 
         
            +
            25k,0.2700,0.2653,,0.2523,0.2395,0.2600,0.2526,0.2616,0.2559,0.2369,0.2476,0.2462,0.2355
         
     | 
| 8 | 
         
            +
            30k,0.2687,0.2556,,0.2402,,0.2452,0.2533,0.2606,0.2503,0.2456,0.2452,0.2446,0.2372
         
     | 
| 9 | 
         
            +
            35k,0.2765,0.2533,,0.2683,0.2596,0.2590,0.2509,0.2630,0.2737,0.2392,0.2405,0.2536,0.2402
         
     | 
| 10 | 
         
            +
            40k,0.2667,0.2683,,0.2496,0.2496,0.2593,0.2529,0.2697,0.2663,0.2379,0.2486,0.2526,0.2422
         
     | 
| 11 | 
         
            +
            45k,0.2750,0.2620,,0.2616,0.2586,0.2563,0.2503,0.2683,0.2673,0.2479,0.2496,0.2513,0.2472
         
     | 
| 12 | 
         
            +
            50k,0.2861,0.2697,,0.2693,0.2553,0.2596,0.2553,0.2700,0.2771,0.2442,0.2425,0.2546,0.2395
         
     | 
| 13 | 
         
            +
            55k,0.2848,0.2693,,0.2640,0.2630,0.2566,0.2479,0.2630,0.2757,0.2526,0.2506,0.2586,0.2509
         
     | 
| 14 | 
         
            +
            60k,0.2945,0.2784,,0.2727,0.2596,0.2633,0.2590,0.2690,0.2714,0.2519,0.2563,0.2553,0.2479
         
     | 
| 15 | 
         
            +
            65k,0.3008,0.2767,,0.2680,0.2623,0.2704,0.2610,0.2492,0.2727,0.2529,0.2559,0.2647,0.2462
         
     | 
| 16 | 
         
            +
            70k,0.2891,0.2824,,0.2730,0.2596,0.2710,0.2700,0.2677,0.2807,0.2469,0.2459,0.2626,0.2576
         
     | 
| 17 | 
         
            +
            75k,0.2982,0.2938,,0.2784,0.2647,0.2630,0.2697,0.2777,0.2620,0.2626,0.2499,0.2583,0.2549
         
     | 
| 18 | 
         
            +
            80k,0.2948,0.2801,,0.2737,0.2727,0.2643,0.2553,0.2657,0.2704,0.2509,0.2590,0.2549,0.2563
         
     | 
| 19 | 
         
            +
            85k,0.2992,0.2938,,0.2754,0.2620,0.2704,0.2677,0.2600,0.2771,0.2496,0.2385,0.2620,0.2529
         
     | 
| 20 | 
         
            +
            90k,0.3002,0.2888,,0.2764,0.2714,0.2737,0.2573,0.2693,0.2918,0.2616,0.2492,0.2566,0.2516
         
     | 
| 21 | 
         
            +
            95k,0.3025,0.2817,,0.2616,0.2690,0.2737,0.2523,0.2690,0.2791,0.2492,0.2576,0.2576,0.2549
         
     | 
| 22 | 
         
            +
            100k,0.2951,0.2894,,0.2616,,0.2817,0.2660,0.2757,0.2861,0.2546,0.2479,0.2667,0.2559
         
     | 
| 23 | 
         
            +
            105k,0.3052,0.2928,,0.2653,,0.2710,0.2707,0.2771,0.2868,0.2529,0.2482,0.2640,0.2633
         
     | 
| 24 | 
         
            +
            110k,0.3052,0.2985,,0.2600,0.2764,0.2781,0.2600,0.2764,0.2824,0.2536,,0.2727,0.2606
         
     | 
| 25 | 
         
            +
            115k,0.3025,0.2985,,0.2690,0.2791,0.2720,0.2704,0.2744,0.2918,0.2623,,0.2807,0.2496
         
     | 
| 26 | 
         
            +
            120k,0.3042,0.2985,,0.2750,0.2647,0.2650,0.2814,0.2754,0.2955,0.2677,,0.2626,0.2586
         
     | 
| 27 | 
         
            +
            125k,0.3149,0.3018,,0.2683,0.2707,0.2647,0.2757,0.2760,0.2804,0.2509,,0.2704,0.2496
         
     | 
| 28 | 
         
            +
            130k,0.3179,0.2978,,0.2781,0.2747,0.2653,0.2760,0.2774,0.2767,0.2593,,,0.2513
         
     | 
| 29 | 
         
            +
            135k,0.3226,0.2945,,0.2747,,0.2717,0.2673,0.2784,0.2884,0.2606,,,0.2533
         
     | 
| 30 | 
         
            +
            140k,,0.3018,,0.2771,,0.2757,0.2794,0.2787,0.2821,0.2459,,,0.2596
         
     | 
| 31 | 
         
            +
            145k,,,,0.2724,,0.2650,0.2720,0.2888,0.2801,0.2543,,,0.2633
         
     | 
| 32 | 
         
            +
            150k,,,,0.2720,,0.2814,,0.2864,0.2901,0.2590,,,0.2543
         
     | 
| 33 | 
         
            +
            155k,,,,,,0.2784,0.2720,0.2874,0.2938,0.2580,,,0.2566
         
     | 
| 34 | 
         
            +
            160k,,,,0.2817,,0.2834,0.2653,0.2807,0.2814,0.2563,,,0.2549
         
     | 
| 35 | 
         
            +
            165k,,,,0.2834,,0.2821,0.2804,,0.2955,0.2559,,,0.2536
         
     | 
| 36 | 
         
            +
            170k,,,,0.2854,,0.2824,0.2804,,0.3119,0.2536,,,0.2626
         
     | 
| 37 | 
         
            +
            175k,,,,0.2804,,0.2915,0.2750,,0.2988,0.2489,,,0.2657
         
     | 
| 38 | 
         
            +
            180k,,,,0.2767,,0.2901,0.2958,,0.3099,0.2623,,,0.2643
         
     | 
| 39 | 
         
            +
            185k,,,,0.2767,,0.2948,0.2804,,0.3055,0.2570,,,0.2643
         
     | 
| 40 | 
         
            +
            190k,,,,0.2787,,0.2925,,,0.3065,0.2573,,,0.2760
         
     | 
| 41 | 
         
            +
            195k,,,,0.2858,,0.2898,,,0.3119,0.2640,,,0.2657
         
     | 
| 42 | 
         
            +
            200k,,,,0.2771,,0.3028,,,0.3112,0.2610,,,0.2687
         
     | 
| 43 | 
         
            +
            205k,,,,0.2851,,0.2921,,,0.3002,0.2680,,,0.2667
         
     | 
| 44 | 
         
            +
            210k,,,,0.2838,,0.2817,,,0.3022,0.2650,,,0.2714
         
     | 
| 45 | 
         
            +
            215k,,,,0.2838,,0.2851,,,0.3069,0.2653,,,0.2600
         
     | 
| 46 | 
         
            +
            220k,,,,0.2938,,0.2814,,,0.3002,0.2549,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.2935,,0.2898,,,0.3049,0.2633,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.2888,,,,,0.3132,0.2653,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.3055,,,,,0.2951,0.2717,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.2995,,,,,,0.2667,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.2928,,,,,,0.2610,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.3092,,,,,,0.2650,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.3152,,,,,,0.2643,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.2951,,,,,,0.2616,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.3045,,,,,,0.2610,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.3018,,,,,,,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.3065,,,,,,,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.3015,,,,,,,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.2965,,,,,,0.2586,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.2623,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.2603,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.2630,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.2710,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,0.2677,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,0.2650,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - MMLU.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            5-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base,Comments
         
     | 
| 2 | 
         
            +
            time: 20 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192,"1. Comparing with upsample2-155k, the social science acc of dclm-195k is much higher"
         
     | 
| 3 | 
         
            +
            5k,0.2398,0.2671,,,0.2579,0.2418,0.2482,0.2690,0.2456,0.2512,0.2532,0.2428,0.2530,
         
     | 
| 4 | 
         
            +
            10k,0.2535,0.2520,,0.2594,0.2612,,0.2628,0.2319,0.2525,0.2462,0.2582,0.2713,0.2529,
         
     | 
| 5 | 
         
            +
            15k,0.2527,0.2347,,,,0.2489,0.2334,0.2483,0.2503,0.2549,0.2380,0.2653,0.2507,
         
     | 
| 6 | 
         
            +
            20k,0.2530,0.2478,,0.2495,0.2467,0.2677,0.2449,0.2507,0.2540,0.2416,0.2612,0.2482,0.2553,
         
     | 
| 7 | 
         
            +
            25k,0.2503,0.2488,,,0.2431,0.2597,0.2571,0.2506,0.2534,0.2505,0.2464,0.2577,0.2552,
         
     | 
| 8 | 
         
            +
            30k,0.2297,0.2539,,,,0.2592,0.2678,0.2389,0.2557,0.2468,0.2517,0.2485,0.2556,
         
     | 
| 9 | 
         
            +
            35k,0.2356,0.2374,,0.2426,0.2591,0.2550,0.2562,0.2594,0.2494,0.2403,0.2451,0.2547,0.2443,
         
     | 
| 10 | 
         
            +
            40k,0.2406,0.2462,,0.2467,0.2485,0.2344,0.2408,0.2555,0.2686,0.2552,0.2500,0.2553,0.2775,
         
     | 
| 11 | 
         
            +
            45k,0.2470,0.2428,,0.2418,0.2296,0.2512,0.2712,0.2630,0.2503,0.2368,0.2536,0.2557,0.2393,
         
     | 
| 12 | 
         
            +
            50k,0.2421,0.2368,,0.2382,0.2441,0.2727,0.2558,0.2558,0.2322,0.2499,0.2563,0.2305,0.2485,
         
     | 
| 13 | 
         
            +
            55k,0.2460,0.2551,,0.2408,0.2536,0.2389,0.2440,0.2444,0.2747,0.2552,0.2516,0.2339,0.2595,
         
     | 
| 14 | 
         
            +
            60k,0.2415,0.2397,,0.2718,0.2539,0.2518,0.2339,0.2551,0.2432,0.2517,0.2589,0.2379,0.2589,
         
     | 
| 15 | 
         
            +
            65k,0.2490,0.2641,,0.2637,0.2423,0.2589,0.2342,0.2303,0.2478,0.2485,0.2643,0.2485,0.2798,
         
     | 
| 16 | 
         
            +
            70k,0.2578,0.2641,,0.2534,0.2359,0.2716,0.2673,0.2307,0.2478,0.2483,0.2426,0.2499,0.2583,
         
     | 
| 17 | 
         
            +
            75k,0.2587,0.2599,,0.2529,0.2372,0.2514,0.2579,0.2519,0.2478,0.2742,0.2594,0.2371,0.2653,
         
     | 
| 18 | 
         
            +
            80k,0.2493,0.2519,,0.2504,0.2344,0.2582,0.2535,0.2433,0.2718,0.2596,0.2536,0.2553,0.2573,
         
     | 
| 19 | 
         
            +
            85k,0.2527,0.2789,,0.2547,0.2496,0.2564,0.2418,0.2572,0.2465,0.2663,0.2552,0.2485,0.2584,
         
     | 
| 20 | 
         
            +
            90k,0.2679,0.2668,,0.2595,0.2464,0.2608,0.2359,0.2777,0.2475,0.2543,0.2514,0.2411,0.2499,
         
     | 
| 21 | 
         
            +
            95k,0.2551,0.2763,,0.2621,0.2469,0.2505,0.2534,0.2584,0.2424,0.2607,0.2742,0.2385,0.2521,
         
     | 
| 22 | 
         
            +
            100k,0.2594,0.2564,,0.2550,,0.2614,0.2461,0.2611,0.2497,0.2675,0.2545,0.2540,0.2574,
         
     | 
| 23 | 
         
            +
            105k,0.2787,0.2473,,0.2659,,0.2542,0.2729,0.2666,0.2468,0.2610,0.2726,0.2465,0.2798,
         
     | 
| 24 | 
         
            +
            110k,0.3079,0.2458,,0.2551,0.2629,0.2512,0.2604,0.3027,0.2522,0.2673,,0.2410,0.2540,
         
     | 
| 25 | 
         
            +
            115k,0.3185,0.2458,,0.2624,0.2324,0.2569,0.2590,0.2863,0.2584,0.2624,,0.2396,0.2771,
         
     | 
| 26 | 
         
            +
            120k,0.3139,0.2832,,0.2626,0.2663,0.2718,0.2629,0.3190,0.2748,0.2419,,0.2544,0.2772,
         
     | 
| 27 | 
         
            +
            125k,0.2960,0.2928,,0.2712,0.2733,0.2663,0.2768,0.2788,0.2570,0.2616,,0.2466,0.2856,
         
     | 
| 28 | 
         
            +
            130k,0.3033,0.2844,,0.2404,0.2635,0.2767,0.2676,0.3191,0.2812,0.2538,,,0.2973,
         
     | 
| 29 | 
         
            +
            135k,0.2934,0.2895,,0.2641,,0.2713,0.2735,0.3119,0.2882,0.2661,,,0.3203,
         
     | 
| 30 | 
         
            +
            140k,,0.3045,,0.2553,,0.2811,0.2765,0.2866,0.3019,0.2730,,,0.2772,
         
     | 
| 31 | 
         
            +
            145k,,,,0.2492,,0.2850,0.2708,0.3107,0.3090,0.2582,,,0.3435,
         
     | 
| 32 | 
         
            +
            150k,,,,0.2595,,0.2780,,0.3225,0.3199,0.2541,,,0.3112,
         
     | 
| 33 | 
         
            +
            155k,,,,0.2681,,0.2664,0.2463,0.3618,0.3116,0.2594,,,0.3361,
         
     | 
| 34 | 
         
            +
            160k,,,,0.2605,,0.2793,0.2821,0.3047,0.3240,0.2688,,,0.3392,
         
     | 
| 35 | 
         
            +
            165k,,,,0.2725,,0.2933,0.2816,,0.3478,0.2653,,,0.3485,
         
     | 
| 36 | 
         
            +
            170k,,,,0.2514,,0.2656,0.2893,,0.3423,0.2537,,,0.3355,
         
     | 
| 37 | 
         
            +
            175k,,,,0.2535,,0.3007,0.3317,,0.3156,0.2621,,,0.3162,
         
     | 
| 38 | 
         
            +
            180k,,,,0.2561,,0.2785,0.2624,,0.2893,0.2555,,,0.3398,
         
     | 
| 39 | 
         
            +
            185k,,,,0.2523,,0.3131,0.3026,,0.3876,0.2461,,,0.3631,
         
     | 
| 40 | 
         
            +
            190k,,,,0.2653,,0.3226,,,0.3131,0.2540,,,0.3930,
         
     | 
| 41 | 
         
            +
            195k,,,,0.2681,,0.3136,,,0.3473,0.2550,,,0.3972,
         
     | 
| 42 | 
         
            +
            200k,,,,0.2515,,0.2811,,,0.3257,0.2481,,,0.3660,
         
     | 
| 43 | 
         
            +
            205k,,,,0.2619,,0.3004,,,0.3836,,,,0.3748,
         
     | 
| 44 | 
         
            +
            210k,,,,0.2687,,0.2996,,,0.3063,0.2646,,,0.3668,
         
     | 
| 45 | 
         
            +
            215k,,,,0.2653,,0.3329,,,0.3947,0.2626,,,0.3641,
         
     | 
| 46 | 
         
            +
            220k,,,,0.2631,,0.3590,,,0.3621,0.2600,,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.2737,,0.3453,,,0.4151,0.2589,,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.2833,,,,,0.3825,0.2587,,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.2703,,,,,0.3897,,,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.2572,,,,,,0.2610,,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.2700,,,,,,0.2612,,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.2639,,,,,,0.2583,,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.2680,,,,,,0.2564,,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.2897,,,,,,0.2631,,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.2815,,,,,,0.2635,,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.2693,,,,,,,,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.2789,,,,,,0.2643,,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.3052,,,,,,0.2687,,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.2850,,,,,,0.2605,,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.2779,,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.2755,,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,,,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.2614,,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,0.2646,,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,0.2745,,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - MedQA.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            0-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base
         
     | 
| 2 | 
         
            +
            time: 3 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192
         
     | 
| 3 | 
         
            +
            5k,0.2679,0.2946,,0.2152,,0.2781,0.2482,0.2160,0.2687,0.2639,0.2215,0.2584,0.2396
         
     | 
| 4 | 
         
            +
            10k,0.2749,0.2506,,0.2380,0.2372,0.2624,0.2616,0.2734,0.2718,0.2647,0.2435,0.2789,0.2435
         
     | 
| 5 | 
         
            +
            15k,0.2388,0.2773,,0.2270,,0.2749,0.2797,0.2545,0.2639,0.2726,0.2773,0.2647,0.2412
         
     | 
| 6 | 
         
            +
            20k,0.2412,0.2773,,0.2419,,,0.2317,0.2789,0.2757,0.2372,0.2506,0.2742,0.2577
         
     | 
| 7 | 
         
            +
            25k,0.2302,0.2624,,0.2184,,0.2561,0.2569,0.2804,0.2474,0.2616,0.2498,0.2624,0.2419
         
     | 
| 8 | 
         
            +
            30k,0.2545,0.2553,,0.2679,0.2522,0.2239,0.2097,0.2742,0.2608,0.2592,0.2694,0.2655,0.2765
         
     | 
| 9 | 
         
            +
            35k,0.2757,0.2577,,0.2647,0.2655,0.2671,0.2467,0.2907,0.2694,0.2797,0.2498,0.2749,0.2514
         
     | 
| 10 | 
         
            +
            40k,0.2710,0.2608,,0.2671,0.2396,0.2490,0.2569,0.2734,0.2482,0.2647,0.2742,0.2608,0.2671
         
     | 
| 11 | 
         
            +
            45k,0.2765,0.2506,,0.2742,0.2734,0.2600,0.2255,0.2859,0.2333,0.2671,0.2435,0.2474,0.2749
         
     | 
| 12 | 
         
            +
            50k,0.2742,0.2624,,0.2749,0.2537,0.2482,0.2372,0.2412,0.2655,0.2789,0.2412,0.2702,0.2577
         
     | 
| 13 | 
         
            +
            55k,0.2679,0.2545,,0.2797,0.2561,0.2632,0.2294,0.2718,0.2537,0.2647,0.2647,0.2757,0.2632
         
     | 
| 14 | 
         
            +
            60k,0.2773,0.2427,,0.2294,0.2325,0.2789,,0.2419,0.2639,0.2679,0.2506,0.2687,0.2419
         
     | 
| 15 | 
         
            +
            65k,0.2404,0.2687,,0.2663,0.2757,0.2310,0.2749,0.2836,0.2726,0.2734,0.2537,0.2608,0.2459
         
     | 
| 16 | 
         
            +
            70k,0.2600,0.2592,,0.2592,0.2757,0.2797,0.2632,0.2569,0.2435,0.2773,0.2765,0.2702,0.2584
         
     | 
| 17 | 
         
            +
            75k,0.2679,0.2584,,0.2490,0.2679,0.2789,0.2616,0.2710,0.2765,0.2742,0.2710,0.2687,0.2789
         
     | 
| 18 | 
         
            +
            80k,0.2632,0.2702,,0.2797,0.2419,0.2757,0.2522,0.2616,0.2789,0.2655,0.2694,0.2435,0.2757
         
     | 
| 19 | 
         
            +
            85k,0.2734,0.2451,,0.2655,0.2844,0.2608,0.2687,0.2742,0.2553,0.2663,0.2749,0.2639,0.2773
         
     | 
| 20 | 
         
            +
            90k,0.2797,0.2506,,0.2310,0.2364,0.2679,0.2624,,0.2679,0.2608,0.2561,0.2765,0.2820
         
     | 
| 21 | 
         
            +
            95k,0.2529,0.2545,,0.2742,0.2820,0.2797,0.2647,0.2757,0.2749,0.2663,0.2105,0.2655,0.2749
         
     | 
| 22 | 
         
            +
            100k,0.2694,0.2459,,0.2679,,0.2168,0.2702,0.2459,0.2663,0.2655,0.2537,0.2655,0.2781
         
     | 
| 23 | 
         
            +
            105k,0.2537,0.2529,,0.2655,,0.2773,0.2632,0.2592,0.2726,0.2687,0.2671,0.2749,0.2812
         
     | 
| 24 | 
         
            +
            110k,0.2663,0.2419,,0.2718,0.2474,0.2584,0.2537,0.2569,0.2537,0.2349,,0.2537,0.2765
         
     | 
| 25 | 
         
            +
            115k,0.2459,0.2419,,0.2655,0.2718,0.2773,0.2247,0.2852,0.2867,0.2490,,0.2561,0.2364
         
     | 
| 26 | 
         
            +
            120k,0.2624,0.2561,,0.2930,0.2537,0.2671,,0.2718,0.2844,0.2545,,0.2608,0.2443
         
     | 
| 27 | 
         
            +
            125k,0.2451,0.2742,,0.2624,0.2364,0.2451,0.2145,0.2985,0.2883,0.2726,,0.2498,0.2867
         
     | 
| 28 | 
         
            +
            130k,0.2655,0.2797,,0.2828,0.2412,0.2836,0.2891,0.2930,0.2922,0.2522,,,0.2765
         
     | 
| 29 | 
         
            +
            135k,0.2749,0.2655,,,,0.2443,0.2765,0.2883,0.2702,0.2679,,,0.2679
         
     | 
| 30 | 
         
            +
            140k,,0.2781,,0.2529,,0.2427,0.2545,0.2962,0.2930,0.2569,,,0.2820
         
     | 
| 31 | 
         
            +
            145k,,,,0.2490,,0.2427,0.2718,0.3048,0.3024,0.2639,,,0.2632
         
     | 
| 32 | 
         
            +
            150k,,,,,,0.2694,,0.2482,0.3244,0.2655,,,0.3150
         
     | 
| 33 | 
         
            +
            155k,,,,0.2608,,0.2789,0.2624,0.3134,,0.2490,,,0.3009
         
     | 
| 34 | 
         
            +
            160k,,,,0.2529,,0.2765,0.2726,0.3079,0.2852,0.2577,,,0.2757
         
     | 
| 35 | 
         
            +
            165k,,,,0.2388,,0.2592,0.2742,,0.2561,0.2380,,,0.3009
         
     | 
| 36 | 
         
            +
            170k,,,,0.2435,,0.2852,0.2506,,0.3056,0.2380,,,0.2836
         
     | 
| 37 | 
         
            +
            175k,,,,0.2632,,0.2757,0.2647,,0.3126,0.2671,,,0.2993
         
     | 
| 38 | 
         
            +
            180k,,,,0.2608,,0.2592,0.2899,,0.3166,0.2396,,,0.3071
         
     | 
| 39 | 
         
            +
            185k,,,,0.2710,,0.2859,0.2561,,0.3268,0.2537,,,0.2490
         
     | 
| 40 | 
         
            +
            190k,,,,0.2812,,0.2914,,,0.3040,0.2577,,,0.2828
         
     | 
| 41 | 
         
            +
            195k,,,,0.2482,,0.2797,,,0.3472,0.2694,,,0.2883
         
     | 
| 42 | 
         
            +
            200k,,,,0.2639,,0.2584,,,0.3339,0.2639,,,0.3126
         
     | 
| 43 | 
         
            +
            205k,,,,0.2514,,0.3158,,,0.3409,,,,0.2710
         
     | 
| 44 | 
         
            +
            210k,,,,0.2742,,0.3016,,,0.3378,0.2624,,,0.2962
         
     | 
| 45 | 
         
            +
            215k,,,,0.2592,,0.2859,,,0.3362,,,,0.2859
         
     | 
| 46 | 
         
            +
            220k,,,,0.2262,,0.3001,,,0.3559,0.2781,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.2490,,0.3134,,,0.3213,0.2608,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.2357,,,,,0.3472,0.2828,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.2514,,,,,0.3614,0.2639,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.2624,,,,,,0.2867,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.2482,,,,,,0.2718,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.2592,,,,,,0.2624,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.2537,,,,,,0.2781,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.2639,,,,,,0.2679,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.2844,,,,,,0.2616,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.2624,,,,,,,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.2757,,,,,,,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.2852,,,,,,0.2592,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.2726,,,,,,0.2781,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.2671,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.2742,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.2624,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.2718,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,0.2694,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,0.2749,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - NQ.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            5-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base
         
     | 
| 2 | 
         
            +
            time: 22 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192
         
     | 
| 3 | 
         
            +
            5k,0.0615,0.0537,,0.0341,0.0416,0.0634,0.0565,0.0579,0.0526,0.0219,0.0213,0.0205,0.0274
         
     | 
| 4 | 
         
            +
            10k,0.1075,0.1053,,0.0715,,0.0906,0.0931,0.0828,0.0767,0.0391,0.0418,0.0529,0.0554
         
     | 
| 5 | 
         
            +
            15k,0.1382,0.1136,,0.0765,,0.1147,0.1061,0.1152,0.1127,0.0607,0.0560,0.0629,0.0587
         
     | 
| 6 | 
         
            +
            20k,0.1490,0.1393,,0.0787,,0.1161,0.1183,0.1285,0.1247,0.0529,0.0623,0.0668,0.0709
         
     | 
| 7 | 
         
            +
            25k,0.1687,0.1416,,0.0892,0.1150,0.1402,0.1352,0.1380,0.1343,0.0584,0.0687,0.0762,0.0828
         
     | 
| 8 | 
         
            +
            30k,0.1767,0.1557,,0.0911,0.1366,0.1454,0.1271,0.1501,0.1421,0.0723,0.0687,0.0723,0.0839
         
     | 
| 9 | 
         
            +
            35k,0.1706,0.1756,,0.0970,0.1488,0.1573,0.1485,0.1565,0.1524,0.0803,0.0798,0.0803,0.0778
         
     | 
| 10 | 
         
            +
            40k,0.1942,0.1759,,0.1028,0.1355,0.1560,0.1488,0.1554,0.1562,0.0759,0.0848,0.0845,0.0886
         
     | 
| 11 | 
         
            +
            45k,0.1798,0.1820,,0.1078,0.1488,0.1715,0.1620,0.1684,0.1598,0.0881,0.0911,0.0867,0.0848
         
     | 
| 12 | 
         
            +
            50k,0.1972,0.1809,,0.1050,0.1540,,0.1590,0.1657,0.1698,0.0864,0.0909,0.0909,0.0884
         
     | 
| 13 | 
         
            +
            55k,0.2158,0.1956,,0.1097,0.1607,0.1659,0.1662,0.1751,0.1704,0.0892,0.0898,0.0745,0.0931
         
     | 
| 14 | 
         
            +
            60k,0.2039,0.2036,,0.1211,0.1654,0.1734,0.1612,0.1745,0.1801,0.0817,0.0850,0.0922,0.0986
         
     | 
| 15 | 
         
            +
            65k,0.2244,0.2044,,0.1089,0.1573,0.1765,0.1693,0.1776,0.1823,0.0920,0.0967,0.1025,0.1066
         
     | 
| 16 | 
         
            +
            70k,0.2233,0.2233,,0.1222,0.1634,0.1845,0.1679,0.1859,0.1767,0.1022,0.0925,0.1039,0.1177
         
     | 
| 17 | 
         
            +
            75k,0.2305,0.2277,,0.1097,0.1709,0.1825,0.1881,0.1737,0.1762,0.1069,0.0936,0.1116,0.1199
         
     | 
| 18 | 
         
            +
            80k,0.2457,0.2252,,0.1277,0.1573,0.1900,0.1776,0.1787,0.1964,0.1047,0.0981,0.1033,0.1097
         
     | 
| 19 | 
         
            +
            85k,0.2501,0.2285,,0.1280,0.1776,0.1914,0.1889,0.1870,0.1889,0.0942,0.0964,0.1144,0.1213
         
     | 
| 20 | 
         
            +
            90k,0.2504,0.2521,,0.1158,0.1598,0.1911,0.1806,0.1898,0.1773,0.1058,0.0964,0.1186,0.1163
         
     | 
| 21 | 
         
            +
            95k,0.2579,0.2443,,0.1235,0.1762,0.1911,0.1781,0.1989,0.1917,0.1097,0.0928,0.1213,0.1169
         
     | 
| 22 | 
         
            +
            100k,0.2526,0.2446,,0.1258,,0.2097,0.1928,0.1903,0.1947,0.1125,0.1025,0.1127,0.1188
         
     | 
| 23 | 
         
            +
            105k,0.2679,0.2482,,0.1366,,0.2028,0.1814,0.1922,0.2094,0.1199,0.1069,0.1186,0.1269
         
     | 
| 24 | 
         
            +
            110k,0.2717,0.2562,,0.1377,0.1756,0.2019,0.1859,0.1975,,0.1152,,0.1252,0.1252
         
     | 
| 25 | 
         
            +
            115k,0.2745,0.2562,,0.1346,0.1831,0.1956,0.1947,0.1903,0.2119,0.1127,,0.1285,0.1111
         
     | 
| 26 | 
         
            +
            120k,0.2801,0.2612,,0.1402,0.2014,0.2000,,0.2044,0.2119,0.1188,,0.1166,0.1219
         
     | 
| 27 | 
         
            +
            125k,0.2751,0.2657,,0.1307,0.2030,0.2014,0.1992,0.2053,0.1787,0.1230,,0.1274,0.1418
         
     | 
| 28 | 
         
            +
            130k,0.2884,0.2673,,0.1368,0.1997,0.2125,0.1994,0.2011,0.2086,0.1127,,,0.1335
         
     | 
| 29 | 
         
            +
            135k,0.2842,0.2673,,0.1363,,0.2069,0.2014,0.2036,0.2069,0.1255,,,0.1299
         
     | 
| 30 | 
         
            +
            140k,,0.2679,,0.1435,,0.2039,0.1986,0.2042,0.2058,0.1263,,,0.1299
         
     | 
| 31 | 
         
            +
            145k,,,,0.1532,,0.2172,0.1953,0.2078,0.2102,0.1274,,,0.1443
         
     | 
| 32 | 
         
            +
            150k,,,,0.1404,,0.2125,,0.2127,0.2075,0.1263,,,0.1410
         
     | 
| 33 | 
         
            +
            155k,,,,0.1418,,0.2235,0.1931,0.2066,0.2205,0.1418,,,0.1460
         
     | 
| 34 | 
         
            +
            160k,,,,0.1346,,0.2183,0.2116,0.2069,0.2208,0.1319,,,0.1413
         
     | 
| 35 | 
         
            +
            165k,,,,0.1524,,0.2219,0.2139,,0.2213,0.1296,,,0.1424
         
     | 
| 36 | 
         
            +
            170k,,,,0.1388,,0.2175,,,0.2169,0.1366,,,0.1454
         
     | 
| 37 | 
         
            +
            175k,,,,0.1438,,0.2235,0.2222,,0.2321,0.1349,,,0.1399
         
     | 
| 38 | 
         
            +
            180k,,,,0.1471,,0.2260,0.2249,,0.236,0.1465,,,0.1421
         
     | 
| 39 | 
         
            +
            185k,,,,0.1499,,0.2341,0.2222,,0.2366,0.1449,,,0.1421
         
     | 
| 40 | 
         
            +
            190k,,,,0.1504,,0.2233,,,0.2274,0.1413,,,0.1471
         
     | 
| 41 | 
         
            +
            195k,,,,0.1554,,0.2330,,,0.2454,0.1440,,,0.1407
         
     | 
| 42 | 
         
            +
            200k,,,,0.1565,,0.2238,,,0.2346,0.1407,,,0.1449
         
     | 
| 43 | 
         
            +
            205k,,,,0.1726,,0.2271,,,0.2316,0.1382,,,0.1501
         
     | 
| 44 | 
         
            +
            210k,,,,0.1623,,0.2305,,,0.2493,0.1526,,,0.1424
         
     | 
| 45 | 
         
            +
            215k,,,,0.1576,,0.2299,,,0.2355,0.1518,,,0.1535
         
     | 
| 46 | 
         
            +
            220k,,,,0.1693,,0.2330,,,0.2427,0.1529,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.1596,,0.2366,,,0.2440,0.1479,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.1693,,,,,0.2554,0.1560,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.1720,,,,,0.2535,0.1540,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.1712,,,,,,0.1554,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.1704,,,,,,0.1532,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.1784,,,,,,0.1551,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.1740,,,,,,0.1623,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.1756,,,,,,0.1618,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.1886,,,,,,0.1604,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.1820,,,,,,0.1612,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.1870,,,,,,0.1629,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.1704,,,,,,0.1645,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.1903,,,,,,0.1665,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.1648,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.1712,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.1690,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.1712,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - PIQA.csv
    ADDED
    
    | 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ,Slim-Pajama 600B (bsz=4K x 1024),,,,,,FineWeb-1.5T,,Ours-Base,,Ours-Upsampling1,,Ours-Upsampling2,,Ours-Code-Upsampling2,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,DCLM-Base,
         
     | 
| 2 | 
         
            +
            0-shot: 3 min,Llama-8x8B-baseline,,Llama-8x8B-seq8192,,Llama-8x8B-mup,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-1x8B-seq8192,,Llama_extend-1x8B-seq8192,,Jais-1x8B-seq8192,,Llama-1x8B-seq8192,
         
     | 
| 3 | 
         
            +
            5-shot: 4 min,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot
         
     | 
| 4 | 
         
            +
            5k,0.7236,0.7073,0.7176,0.7133,,,0.7470,0.7263,,0.7106,0.7280,0.7242,0.7378,0.7296,0.7356,0.7323,0.7318,0.7263,0.7078,0.7002,0.7057,0.6844,0.7116,0.6975,0.7046,0.6942
         
     | 
| 5 | 
         
            +
            10k,0.7492,0.7421,0.7427,0.7318,,,0.7650,0.7524,0.7573,0.7454,0.7568,0.7486,0.7557,0.7492,0.7726,0.7568,0.7612,0.7486,0.7198,0.7089,0.7209,0.7127,0.7280,0.7236,0.7367,0.7144
         
     | 
| 6 | 
         
            +
            15k,0.7688,0.7541,0.7639,0.7481,,,0.7775,0.7655,0.7628,,0.7748,0.7622,0.7769,0.7622,0.7786,0.7737,0.7655,0.7661,0.7367,0.7329,0.7378,0.7312,0.7350,0.7318,0.7443,0.7133
         
     | 
| 7 | 
         
            +
            20k,0.7639,0.7655,0.7682,0.7579,,,0.7807,0.7612,0.7671,0.7590,0.7845,,0.7709,0.7693,0.7813,0.7650,0.7840,0.7758,0.7465,0.7378,0.7470,0.7312,0.7486,0.7394,0.7514,0.7323
         
     | 
| 8 | 
         
            +
            25k,0.7639,0.7677,0.7682,0.7671,,,0.7878,0.7748,,0.7590,,0.7693,0.7913,0.7671,0.7818,0.7715,0.7791,0.7715,0.7470,0.7378,0.7503,0.7437,0.7470,0.7492,0.7497,0.7345
         
     | 
| 9 | 
         
            +
            30k,0.7764,0.7677,0.7797,0.7563,,,0.7862,0.7704,0.7780,0.7617,0.7802,0.7737,0.7829,0.7655,0.7813,0.7661,0.7889,0.7731,0.7524,0.7388,0.7497,0.7497,0.7546,0.7437,0.7563,0.7427
         
     | 
| 10 | 
         
            +
            35k,0.7802,0.7677,0.7769,0.7622,,,0.7933,0.7726,0.7769,0.7699,0.7878,0.7682,0.7824,0.7737,0.7813,0.7797,0.7987,0.7780,0.7573,0.7361,0.7508,0.7421,0.7612,0.7579,0.7655,0.7486
         
     | 
| 11 | 
         
            +
            40k,0.7873,0.7802,0.7802,0.7682,,,0.7905,0.7758,,0.7731,0.7889,0.7731,0.7943,0.7704,0.7835,0.7775,0.7878,0.7769,0.7573,0.7383,0.7579,0.7481,0.7606,0.7492,0.7650,0.7519
         
     | 
| 12 | 
         
            +
            45k,0.7813,0.7786,0.7764,0.7699,,,0.7982,0.7824,0.7786,0.7661,0.7911,0.7775,0.7829,0.7737,0.7894,0.7835,0.7949,0.7780,0.7579,0.7465,0.7639,0.7465,0.7612,0.7514,0.7677,0.7497
         
     | 
| 13 | 
         
            +
            50k,0.7818,0.7797,0.7878,0.7753,,,0.7992,0.7780,0.7775,0.7748,0.7856,0.7775,0.7943,0.7699,0.7998,0.7851,0.7933,0.7786,0.7557,0.7437,0.7524,0.7443,0.7677,0.7579,0.7802,0.7601
         
     | 
| 14 | 
         
            +
            55k,0.7900,0.7780,0.7905,0.7829,,,0.8079,0.7737,0.7786,0.7775,0.7878,0.7731,0.7884,0.7780,0.7976,0.7905,0.7943,0.7824,0.7661,0.7546,0.7655,0.7541,0.7704,0.7606,0.7704,0.7606
         
     | 
| 15 | 
         
            +
            60k,0.7916,0.7851,0.7911,0.7797,,,0.7922,0.7797,0.7818,0.7813,0.7900,0.7699,0.7905,0.7661,0.7943,0.7878,0.8003,0.7818,0.7650,0.7530,0.7628,0.7557,0.7661,0.7628,0.7661,0.7579
         
     | 
| 16 | 
         
            +
            65k,0.7938,0.7840,0.7927,0.7840,,,0.7976,0.7769,0.7900,0.7780,0.7933,0.7731,0.7835,0.7671,0.7960,0.7845,0.7943,0.7780,0.7720,0.7492,0.7606,0.7535,0.7748,0.7639,0.7704,0.7584
         
     | 
| 17 | 
         
            +
            70k,0.7922,0.7835,0.7922,0.7845,,,0.8052,0.7916,0.7916,0.7818,0.7949,0.7807,0.7900,0.7726,0.7889,0.7845,0.7976,0.7900,0.7633,0.7524,0.7612,0.7552,0.7644,0.7622,0.7699,0.7568
         
     | 
| 18 | 
         
            +
            75k,0.7938,0.7927,0.7949,0.7840,,,0.8030,0.7873,0.7878,0.7715,0.7938,0.7807,0.8079,0.7922,0.7927,0.7905,0.8020,0.7933,0.7655,0.7541,0.7682,0.7508,0.7737,0.7568,0.7737,0.7573
         
     | 
| 19 | 
         
            +
            80k,0.7911,0.7878,0.7873,0.7894,,,0.7971,0.7742,0.7829,0.7797,0.7987,0.7824,0.7992,0.7894,0.8003,0.7900,0.7933,0.7884,0.7671,0.7497,0.7682,0.7524,0.7748,0.7563,0.7742,0.7628
         
     | 
| 20 | 
         
            +
            85k,0.7949,0.7894,0.7900,0.7889,,,0.8003,0.7840,0.8014,0.7786,0.8025,0.7894,0.7949,0.7818,0.7992,0.7894,0.7965,0.7851,0.7682,0.7530,0.7731,0.7563,0.7829,0.7622,0.7780,0.7704
         
     | 
| 21 | 
         
            +
            90k,0.7982,0.7894,0.7916,0.7943,,,0.7976,0.7797,0.7873,0.7720,0.7971,0.7862,0.7856,0.7845,0.7960,0.7976,0.7998,0.7878,0.7731,0.7535,0.7650,0.7552,0.7737,0.7622,0.7742,0.7617
         
     | 
| 22 | 
         
            +
            95k,0.8058,0.7992,0.8020,0.7873,,,0.8041,0.7742,0.7905,0.7840,0.8014,0.7807,0.7954,0.7829,0.8025,0.7911,0.8003,0.7884,0.7709,0.7535,0.7699,0.7519,0.7731,0.7612,0.7753,0.7704
         
     | 
| 23 | 
         
            +
            100k,0.8069,0.7992,0.8052,0.7873,,,0.8069,0.7856,,,0.8041,0.7851,0.7998,0.7824,0.8014,0.7927,0.8009,0.7905,0.7628,0.7508,0.7715,0.7628,0.7748,0.7584,0.7758,0.7720
         
     | 
| 24 | 
         
            +
            105k,0.8058,0.7965,0.8025,0.7943,,,0.8074,0.7916,,,0.8030,0.7900,0.8063,0.7927,0.8036,0.7949,0.7960,0.7905,0.7688,0.7568,0.7644,0.7601,0.7753,0.7682,0.7797,0.7639
         
     | 
| 25 | 
         
            +
            110k,0.8041,0.7987,0.8069,0.7982,,,0.8085,0.7797,0.7856,0.7856,0.8009,0.7922,0.7938,0.7856,0.8020,0.7911,0.7998,0.7916,0.7682,0.7563,,,0.7791,0.7699,0.7845,0.7633
         
     | 
| 26 | 
         
            +
            115k,0.8090,0.8009,0.8069,0.7982,,,0.8118,0.7867,0.7911,0.7802,0.8020,0.7867,0.8041,0.7922,0.8052,0.7916,0.8052,0.7938,0.7612,0.7541,,,0.7780,0.7633,0.7709,0.7639
         
     | 
| 27 | 
         
            +
            120k,0.8145,0.7949,0.8041,0.7911,,,0.8074,0.7878,0.7982,0.7851,0.7976,0.7922,0.8025,0.7905,0.7938,0.7927,0.7949,0.7905,0.7704,0.7715,,,0.7813,0.7720,0.7867,0.7758
         
     | 
| 28 | 
         
            +
            125k,0.8079,0.8009,0.8058,0.7900,,,0.8107,0.7829,0.8009,0.7900,0.8020,0.7894,0.8047,0.7916,0.8047,0.7976,0.8003,0.7922,0.7677,0.7671,,,0.7824,0.7737,0.7764,0.7699
         
     | 
| 29 | 
         
            +
            130k,0.8069,0.8058,0.8041,0.7982,,,0.8079,0.7845,0.7916,0.7797,0.8036,0.7916,0.8014,0.7949,0.8058,0.8014,0.7922,0.7943,0.7835,0.7622,,,,,0.7748,0.7720
         
     | 
| 30 | 
         
            +
            135k,0.8063,0.8047,0.8090,0.8020,,,0.8074,0.7878,,,0.8009,0.7878,0.8052,0.7835,0.8014,0.8030,0.8014,0.7927,0.7764,0.7682,,,,,0.7867,0.7813
         
     | 
| 31 | 
         
            +
            140k,,,0.8090,0.7992,,,0.8123,0.7911,,,0.8047,0.7916,0.8063,0.7971,0.8079,0.8036,0.7987,0.7976,0.7764,0.7628,,,,,0.7862,0.7720
         
     | 
| 32 | 
         
            +
            145k,,,,,,,0.8069,0.7807,,,0.8047,0.7922,0.8052,0.7845,0.7982,0.8025,0.8030,0.8085,0.7748,0.7688,,,,,0.7791,0.7699
         
     | 
| 33 | 
         
            +
            150k,,,,,,,0.8058,0.7949,,,0.8058,0.7878,,,0.8090,0.7998,0.7987,0.8025,0.7693,0.7579,,,,,0.7916,0.7769
         
     | 
| 34 | 
         
            +
            155k,,,,,,,0.8096,0.8041,,,0.8096,0.7922,0.7954,0.7775,0.8101,0.8041,0.8107,0.7965,0.7769,0.7639,,,,,0.7933,0.7731
         
     | 
| 35 | 
         
            +
            160k,,,,,,,0.8101,0.7900,,,0.8014,0.7976,0.8020,0.7894,0.8128,0.8036,0.8079,0.8009,0.7753,0.7715,,,,,0.7987,0.7709
         
     | 
| 36 | 
         
            +
            165k,,,,,,,0.8112,0.7933,,,0.8030,0.7971,0.8058,0.7878,,,0.8101,0.8009,0.7824,0.7709,,,,,0.7873,0.7682
         
     | 
| 37 | 
         
            +
            170k,,,,,,,,0.7916,,,0.8047,0.7954,0.8041,0.7922,,,0.8036,0.8041,0.7797,0.7720,,,,,0.7884,0.7715
         
     | 
| 38 | 
         
            +
            175k,,,,,,,0.8194,0.7965,,,0.8030,0.7911,0.7982,0.7927,,,0.8118,0.8096,0.7709,0.7666,,,,,0.7911,0.7802
         
     | 
| 39 | 
         
            +
            180k,,,,,,,0.8118,0.7845,,,0.8041,0.7954,0.8025,0.7987,,,0.8172,0.7976,0.7775,0.7677,,,,,0.7884,0.7851
         
     | 
| 40 | 
         
            +
            185k,,,,,,,0.8259,0.7982,,,0.8025,0.7960,0.8036,0.7905,,,0.8096,0.7987,0.7851,0.7737,,,,,0.7927,0.7813
         
     | 
| 41 | 
         
            +
            190k,,,,,,,0.8139,0.8025,,,0.7998,0.7987,,,,,0.8128,0.7998,0.7840,0.7758,,,,,0.7922,0.7867
         
     | 
| 42 | 
         
            +
            195k,,,,,,,0.8188,0.7965,,,0.8090,0.7878,,,,,0.8161,0.8052,0.7748,0.7677,,,,,0.7884,0.7769
         
     | 
| 43 | 
         
            +
            200k,,,,,,,0.8112,0.8025,,,0.8079,0.8009,,,,,0.8128,0.8041,0.7802,0.7726,,,,,0.7916,0.7802
         
     | 
| 44 | 
         
            +
            205k,,,,,,,0.8188,0.8009,,,0.8003,0.7938,,,,,0.8177,0.8145,0.7813,0.7726,,,,,0.7949,0.7748
         
     | 
| 45 | 
         
            +
            210k,,,,,,,0.8188,0.7971,,,0.8047,0.7889,,,,,0.8161,0.8101,0.7818,0.7786,,,,,0.7894,0.7867
         
     | 
| 46 | 
         
            +
            215k,,,,,,,0.8188,0.7992,,,0.8030,0.7922,,,,,0.8085,0.8085,0.7813,0.7748,,,,,0.7845,0.7802
         
     | 
| 47 | 
         
            +
            220k,,,,,,,0.8199,0.8030,,,0.8085,0.7976,,,,,0.8096,0.8074,0.7769,0.7704,,,,,,
         
     | 
| 48 | 
         
            +
            225k,,,,,,,0.8199,0.8041,,,0.8052,0.8014,,,,,0.8134,0.8101,0.7829,0.7731,,,,,,
         
     | 
| 49 | 
         
            +
            230k,,,,,,,0.8172,0.8041,,,,,,,,,0.8134,0.8107,0.7824,0.7802,,,,,,
         
     | 
| 50 | 
         
            +
            235k,,,,,,,0.8199,0.8085,,,,,,,,,0.8205,0.8118,0.7813,0.7764,,,,,,
         
     | 
| 51 | 
         
            +
            240k,,,,,,,0.8166,0.8101,,,,,,,,,,,0.7829,0.7824,,,,,,
         
     | 
| 52 | 
         
            +
            245k,,,,,,,0.8215,0.8090,,,,,,,,,,,0.7873,0.7753,,,,,,
         
     | 
| 53 | 
         
            +
            250k,,,,,,,0.8172,0.8107,,,,,,,,,,,0.7807,0.7797,,,,,,
         
     | 
| 54 | 
         
            +
            255k,,,,,,,0.8254,0.8128,,,,,,,,,,,0.7824,0.7737,,,,,,
         
     | 
| 55 | 
         
            +
            260k,,,,,,,0.8215,0.809,,,,,,,,,,,0.7807,0.7797,,,,,,
         
     | 
| 56 | 
         
            +
            265k,,,,,,,0.8210,0.8139,,,,,,,,,,,0.7775,0.7753,,,,,,
         
     | 
| 57 | 
         
            +
            270k,,,,,,,0.8145,0.8079,,,,,,,,,,,0.7824,,,,,,,
         
     | 
| 58 | 
         
            +
            275k,,,,,,,0.8161,0.8139,,,,,,,,,,,0.7889,0.7769,,,,,,
         
     | 
| 59 | 
         
            +
            280k,,,,,,,0.8248,0.8150,,,,,,,,,,,0.7807,0.7726,,,,,,
         
     | 
| 60 | 
         
            +
            285k,,,,,,,0.8210,0.8101,,,,,,,,,,,0.7916,0.7818,,,,,,
         
     | 
| 61 | 
         
            +
            290k,,,,,,,,,,,,,,,,,,,0.7851,0.7758,,,,,,
         
     | 
| 62 | 
         
            +
            300k,,,,,,,,,,,,,,,,,,,0.7840,0.7780,,,,,,
         
     | 
| 63 | 
         
            +
            305k,,,,,,,,,,,,,,,,,,,0.7873,0.7829,,,,,,
         
     | 
| 64 | 
         
            +
            310k,,,,,,,,,,,,,,,,,,,0.7813,0.7829,,,,,,
         
     | 
| 65 | 
         
            +
            315k,,,,,,,,,,,,,,,,,,,0.7851,0.7791,,,,,,
         
     | 
| 66 | 
         
            +
            320k,,,,,,,,,,,,,,,,,,,0.7873,0.7813,,,,,,
         
     | 
| 67 | 
         
            +
            325k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            330k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 69 | 
         
            +
            335k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - TriviaQA.csv
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            5-shot,Slim-Pajama 600B (bsz=4K x 1024),,,FineWeb-1.5T,Ours-Base,Ours-Upsampling1,Ours-Upsampling2,Ours-Code-Upsampling2,All-Upsampling1,All-Upsampling1,All-Upsampling1,All-Upsampling1,DCLM-Base,Comments
         
     | 
| 2 | 
         
            +
            time: 76 min,Llama-8x8B-baseline,Llama-8x8B-seq8192,Llama-8x8B-mup,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-8x8B-seq8192,Llama-1x8B-seq8192,Llama_extend-1x8B-seq8192,Jais-1x8B-seq8192,Llama-1x8B-seq8192,1. Takes 25min to load checkpoints.
         
     | 
| 3 | 
         
            +
            5k,0.1944,0.1764,,0.1025,0.1232,0.1086,0.1260,0.1647,,0.0841,0.0762,0.0824,0.1066,2. GPU utility is only 20%
         
     | 
| 4 | 
         
            +
            10k,0.3372,0.3292,,0.2073,,0.2636,0.1150,0.2659,0.2604,0.1348,0.1343,0.1585,0.1850,
         
     | 
| 5 | 
         
            +
            15k,0.4050,0.3909,,0.3005,,0.3250,0.1872,0.3445,0.3244,0.1821,0.1930,0.1968,0.2443,
         
     | 
| 6 | 
         
            +
            20k,0.4451,0.4497,,0.3506,0.2795,,0.2719,0.3802,0.3637,0.2086,0.2196,0.2231,0.2772,
         
     | 
| 7 | 
         
            +
            25k,0.4899,0.4601,,0.3070,,0.3975,0.4093,0.4105,0.4120,0.2261,0.2375,0.2574,0.3146,
         
     | 
| 8 | 
         
            +
            30k,0.5125,0.4824,,0.2461,0.2974,0.0303,0.4195,0.4330,0.4294,0.2352,0.2484,0.2675,0.3328,
         
     | 
| 9 | 
         
            +
            35k,0.5249,0.5091,,0.3639,0.3572,0.1983,0.3587,0.4434,0.4428,0.2433,,0.2863,0.3507,
         
     | 
| 10 | 
         
            +
            40k,0.5555,0.5166,,0.3537,0.0346,0.4571,0.4434,0.4618,0.4623,0.2708,0.2828,0.3020,0.3606,
         
     | 
| 11 | 
         
            +
            45k,0.5664,0.5403,,0.3602,0.2674,0.2654,0.4366,0.4746,0.4792,0.2668,0.3018,0.3065,0.3726,
         
     | 
| 12 | 
         
            +
            50k,0.5690,0.5217,,0.2407,0.3689,0.4355,0.4051,0.4885,0.4795,0.2906,0.2952,0.3187,0.3807,
         
     | 
| 13 | 
         
            +
            55k,0.5843,0.5680,,0.2081,0.4101,0.4341,0.3230,0.4931,0.4940,0.2940,0.3117,0.3242,0.3984,
         
     | 
| 14 | 
         
            +
            60k,0.5916,0.5814,,0.4068,0.4107,0.4861,0.4469,0.4955,0.5130,0.3137,0.3090,0.3422,0.4081,
         
     | 
| 15 | 
         
            +
            65k,0.6032,0.5774,,0.3145,0.4477,0.4858,0.4907,0.5039,0.5087,0.3097,0.3184,0.3397,0.4156,
         
     | 
| 16 | 
         
            +
            70k,0.6030,0.5920,,0.4102,0.4736,0.5080,0.4920,0.5164,0.5129,0.3236,0.3360,0.3375,0.4242,
         
     | 
| 17 | 
         
            +
            75k,0.6216,0.6187,,0.2820,0.4226,0.4777,0.2245,0.5190,0.5042,0.3265,0.3341,0.3483,0.4220,
         
     | 
| 18 | 
         
            +
            80k,0.6397,0.6127,,0.0975,0.4217,0.3698,,0.5185,0.5301,0.3352,0.3412,0.3532,0.4306,
         
     | 
| 19 | 
         
            +
            85k,0.6416,0.6254,,0.0722,0.4763,0.3700,0.5029,0.5249,0.5350,0.3448,0.3423,0.3530,0.4340,
         
     | 
| 20 | 
         
            +
            90k,0.6510,0.6317,,0.3388,0.1472,0.4793,0.0317,0.5337,0.5220,0.3440,0.3559,0.3644,0.4418,
         
     | 
| 21 | 
         
            +
            95k,0.6655,0.6479,,0.5283,0.4938,0.5144,0.5180,0.5432,0.5446,0.3331,0.3393,0.3683,0.4454,
         
     | 
| 22 | 
         
            +
            100k,0.6723,0.6486,,0.4317,0.1100,0.5121,0.5358,0.5383,0.5514,0.3520,0.3544,0.3698,0.4378,
         
     | 
| 23 | 
         
            +
            105k,0.6755,0.6582,,0.1886,,0.5280,0.5153,0.5499,0.5562,0.3626,0.3642,0.3683,0.4525,
         
     | 
| 24 | 
         
            +
            110k,0.6798,0.6668,,0.3510,,0.5468,0.5182,0.5541,0.5654,0.3694,,0.3903,0.4566,
         
     | 
| 25 | 
         
            +
            115k,0.6796,0.6668,,0.3692,0.4759,0.5347,0.5132,0.5508,0.5577,0.3741,,0.3908,0.4482,
         
     | 
| 26 | 
         
            +
            120k,0.6822,0.6688,,0.3690,0.4352,0.5376,0.5483,0.5567,0.5658,0.3881,,0.3950,0.4524,
         
     | 
| 27 | 
         
            +
            125k,0.6894,0.6743,,0.3365,0.5206,0.4855,0.5211,0.5617,0.5658,0.3725,,0.3880,0.4592,
         
     | 
| 28 | 
         
            +
            130k,0.6914,0.6709,,0.3550,0.0088,0.5238,0.5245,0.5597,0.5609,0.3698,,,0.4594,
         
     | 
| 29 | 
         
            +
            135k,0.6915,0.6721,,0.3892,,0.5467,0.3977,0.5541,0.5774,0.3782,,,0.4636,
         
     | 
| 30 | 
         
            +
            140k,,0.6773,,0.3930,,0.3110,0.4991,0.5572,0.5675,0.3906,,,0.4741,
         
     | 
| 31 | 
         
            +
            145k,,,,0.4538,,0.5720,0.4872,0.5642,0.5639,,,,0.4720,
         
     | 
| 32 | 
         
            +
            150k,,,,0.2883,,0.5612,,0.5701,0.5844,0.3899,,,0.4651,
         
     | 
| 33 | 
         
            +
            155k,,,,0.4185,,0.5030,0.1586,0.5790,0.5755,0.4044,,,0.4784,
         
     | 
| 34 | 
         
            +
            160k,,,,0.2720,,0.5701,0.5630,0.5819,0.5864,0.4049,,,0.4665,
         
     | 
| 35 | 
         
            +
            165k,,,,0.4252,,0.5388,0.5642,,0.5853,0.4007,,,0.4793,
         
     | 
| 36 | 
         
            +
            170k,,,,0.1507,,0.5951,0.5739,,,0.4150,,,0.4846,
         
     | 
| 37 | 
         
            +
            175k,,,,0.3242,,0.5437,0.5640,,0.5979,0.4092,,,0.4908,
         
     | 
| 38 | 
         
            +
            180k,,,,0.2653,,0.5580,0.5912,,0.6054,0.4189,,,,
         
     | 
| 39 | 
         
            +
            185k,,,,0.2651,,0.5709,0.5852,,0.6064,,,,0.5030,
         
     | 
| 40 | 
         
            +
            190k,,,,0.2380,,0.5142,,,0.5996,0.4193,,,0.5115,
         
     | 
| 41 | 
         
            +
            195k,,,,0.4048,,0.5964,,,0.6243,0.4265,,,,
         
     | 
| 42 | 
         
            +
            200k,,,,0.5058,,0.5684,,,0.6248,0.4256,,,,
         
     | 
| 43 | 
         
            +
            205k,,,,0.0945,,0.5878,,,0.6224,0.4190,,,0.5105,
         
     | 
| 44 | 
         
            +
            210k,,,,0.1557,,0.6020,,,0.6311,0.4415,,,0.5164,
         
     | 
| 45 | 
         
            +
            215k,,,,0.2483,,0.5995,,,0.6293,0.4353,,,0.5163,
         
     | 
| 46 | 
         
            +
            220k,,,,0.1725,,0.5924,,,0.6375,,,,,
         
     | 
| 47 | 
         
            +
            225k,,,,0.2467,,0.4832,,,0.6340,0.4556,,,,
         
     | 
| 48 | 
         
            +
            230k,,,,0.1653,,,,,0.6436,0.4622,,,,
         
     | 
| 49 | 
         
            +
            235k,,,,0.1884,,,,,0.6411,0.4608,,,,
         
     | 
| 50 | 
         
            +
            240k,,,,0.0719,,,,,,0.4536,,,,
         
     | 
| 51 | 
         
            +
            245k,,,,0.3757,,,,,,0.4641,,,,
         
     | 
| 52 | 
         
            +
            250k,,,,0.5859,,,,,,,,,,
         
     | 
| 53 | 
         
            +
            255k,,,,0.4987,,,,,,0.4741,,,,
         
     | 
| 54 | 
         
            +
            260k,,,,0.3940,,,,,,0.4712,,,,
         
     | 
| 55 | 
         
            +
            265k,,,,0.3607,,,,,,0.4767,,,,
         
     | 
| 56 | 
         
            +
            270k,,,,0.3898,,,,,,0.4795,,,,
         
     | 
| 57 | 
         
            +
            275k,,,,0.4123,,,,,,,,,,
         
     | 
| 58 | 
         
            +
            280k,,,,0.2413,,,,,,0.4787,,,,
         
     | 
| 59 | 
         
            +
            285k,,,,0.3665,,,,,,0.4843,,,,
         
     | 
| 60 | 
         
            +
            290k,,,,,,,,,,0.4818,,,,
         
     | 
| 61 | 
         
            +
            300k,,,,,,,,,,0.4969,,,,
         
     | 
| 62 | 
         
            +
            305k,,,,,,,,,,0.4941,,,,
         
     | 
| 63 | 
         
            +
            310k,,,,,,,,,,0.4963,,,,
         
     | 
| 64 | 
         
            +
            315k,,,,,,,,,,,,,,
         
     | 
| 65 | 
         
            +
            320k,,,,,,,,,,,,,,
         
     | 
| 66 | 
         
            +
            325k,,,,,,,,,,,,,,
         
     | 
| 67 | 
         
            +
            330k,,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            335k,,,,,,,,,,,,,,
         
     | 
    	
        data/txt360_eval/CKPT Eval - WinoGrande.csv
    ADDED
    
    | 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ,Slim-Pajama 600B (bsz=4K x 1024),,,,,,FineWeb-1.5T,,Ours-Base,,Ours-Upsampling1,,Ours-Upsampling2,,Ours-Code-Upsampling2,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,All-Upsampling1,,DCLM-Base,
         
     | 
| 2 | 
         
            +
            0-shot: 3 min,Llama-8x8B-baseline,,Llama-8x8B-seq8192,,Llama-8x8B-mup,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-8x8B-seq8192,,Llama-1x8B-seq8192,,Llama_extend-1x8B-seq8192,,Jais-1x8B-seq8192,,Llama-1x8B-seq8192,
         
     | 
| 3 | 
         
            +
            5-shot: 3 min,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot,0-shot,5-shot
         
     | 
| 4 | 
         
            +
            5k,0.5454,0.5367,0.5572,0.5367,,,0.5691,0.5335,0.5351,0.5233,0.5241,,0.5367,0.5391,0.5470,0.5414,0.5383,0.5320,0.5067,0.5201,0.5257,0.5122,0.5020,0.5217,0.5217,0.5312
         
     | 
| 5 | 
         
            +
            10k,0.5801,0.6054,0.5730,0.5525,,,0.5904,0.5620,0.5604,0.5738,0.5825,0.5691,0.5817,0.5809,0.5777,0.5706,0.5667,0.5620,0.5383,0.5375,0.5241,0.5430,0.5343,0.5241,0.5722,0.5517
         
     | 
| 6 | 
         
            +
            15k,0.6172,0.6038,0.5848,0.6046,,,0.5927,0.5951,0.5919,0.5699,,0.6014,0.5880,0.5856,0.5991,0.5943,0.5896,0.5841,0.5596,0.5470,0.5391,0.5414,0.5509,0.5454,0.5620,0.5612
         
     | 
| 7 | 
         
            +
            20k,0.6109,0.6267,0.5935,0.6085,,,0.6448,0.6022,0.6006,,0.6204,,0.6180,0.6235,0.6014,0.6062,0.5935,0.6014,0.5580,0.5691,0.5533,0.5328,0.5659,0.5580,0.5943,0.5714
         
     | 
| 8 | 
         
            +
            25k,0.6417,0.6369,0.5998,0.6140,,,0.6196,0.6164,0.6125,0.5998,0.6093,0.6117,0.6062,0.6117,0.6212,0.6014,0.6101,0.6188,0.5785,0.5770,0.5556,0.5572,0.5762,0.5691,0.5904,0.5864
         
     | 
| 9 | 
         
            +
            30k,0.6377,0.6456,0.6251,0.6361,,,0.6488,0.6251,,,0.6330,0.5983,0.6140,0.6022,0.6338,0.6109,0.6322,0.6188,0.5667,0.5770,0.5612,0.5493,0.5919,0.5604,0.6054,0.5935
         
     | 
| 10 | 
         
            +
            35k,0.6472,0.6456,0.6196,0.5935,,,0.6440,0.6227,0.6030,0.6172,0.6393,0.6243,0.6259,0.6093,0.6172,0.6164,0.6212,0.6188,0.5817,0.5572,0.5848,0.5612,0.5777,0.5714,0.6172,0.5975
         
     | 
| 11 | 
         
            +
            40k,0.6606,0.6630,0.6369,0.6259,,,0.6496,0.6077,0.6338,0.5951,,,0.6267,0.6101,0.6417,0.6235,0.6433,0.6361,0.5856,0.5770,0.5817,0.5777,0.5927,0.5635,0.6117,0.5959
         
     | 
| 12 | 
         
            +
            45k,0.6717,0.6496,0.6196,0.6425,,,0.6456,0.6417,0.6172,0.6109,0.6448,0.6235,0.6393,0.6417,0.6425,0.6369,0.6393,0.6346,0.5777,0.5699,0.5872,0.5809,0.5951,0.5833,0.6156,0.6085
         
     | 
| 13 | 
         
            +
            50k,0.6756,0.6725,0.6369,0.6377,,,0.6464,0.6275,0.6401,0.5975,0.6322,0.6322,0.6164,0.614,0.6425,0.6425,0.6472,0.6393,0.5833,0.5683,0.5872,0.5675,0.5967,0.5580,0.6338,0.6022
         
     | 
| 14 | 
         
            +
            55k,0.6661,0.6590,0.6496,0.6614,,,0.6567,0.6314,0.6235,0.6062,0.6519,0.6133,0.6314,0.6235,0.6377,0.648,0.6464,0.6322,0.5738,0.5612,0.5935,0.5722,0.6148,0.5533,0.6140,0.614
         
     | 
| 15 | 
         
            +
            60k,0.6622,0.6511,0.6377,0.6582,,,0.6480,0.6464,0.6251,0.6117,0.6535,0.6251,0.6219,0.6235,0.6480,0.6196,0.6369,0.6338,0.5864,0.5730,0.5746,0.5683,0.5896,0.5785,0.6062,0.6283
         
     | 
| 16 | 
         
            +
            65k,0.6669,0.6772,0.6590,0.6685,,,0.6654,0.6354,0.6283,0.6180,0.6519,0.6196,0.6401,0.6393,0.6559,0.633,0.6504,0.6275,0.5919,0.5754,0.5825,0.5793,0.6101,0.5959,0.6290,0.6125
         
     | 
| 17 | 
         
            +
            70k,0.6827,0.6811,0.6567,0.6701,,,0.6709,0.6401,0.6322,0.6235,0.6622,,0.6417,0.6409,0.6433,0.6338,0.6559,0.6361,0.5975,0.5927,0.5738,0.5588,0.5983,0.5738,0.6330,0.6101
         
     | 
| 18 | 
         
            +
            75k,0.6788,0.6819,0.6543,0.6685,,,0.6709,0.6283,0.6480,0.6172,0.6654,0.6409,0.6527,0.6267,0.6488,0.6551,0.6527,0.6638,0.5959,0.5817,0.5880,0.5517,0.6164,0.5793,0.6196,0.6164
         
     | 
| 19 | 
         
            +
            80k,0.6835,0.6882,0.6748,0.6638,,,0.6843,0.6464,0.6504,0.6275,0.6677,0.6401,0.6369,0.6298,0.6606,0.6488,0.6519,0.6440,0.5872,0.5675,0.5856,0.5564,0.6148,0.6038,0.6188,0.5983
         
     | 
| 20 | 
         
            +
            85k,0.6867,0.6882,0.6638,0.6590,,,0.6875,0.6504,0.6409,0.6188,,0.6535,0.6575,0.6283,0.6606,0.6393,0.6393,0.6543,0.6085,0.5919,0.5872,0.5738,0.6156,0.5817,0.6361,0.6196
         
     | 
| 21 | 
         
            +
            90k,0.6827,0.6803,0.6740,0.6598,,,0.6740,0.6393,0.6369,0.6306,0.6496,0.6543,0.6488,0.6409,0.6606,0.6314,0.6527,0.6409,0.5927,0.5825,0.5951,0.5888,0.6148,0.6077,0.6259,0.6164
         
     | 
| 22 | 
         
            +
            95k,0.6859,0.6859,0.6764,0.6575,,,0.6835,0.6401,0.6369,0.6361,0.6551,0.6322,0.6654,0.6338,0.6630,0.6322,0.6409,0.6582,0.6156,0.5927,0.5864,0.5919,0.6164,0.5809,0.6283,0.6014
         
     | 
| 23 | 
         
            +
            100k,0.6898,,0.6661,0.6851,,,0.6756,0.6567,,,0.6567,0.6472,0.6590,0.6488,0.6748,0.6204,0.6511,0.6519,0.6109,0.5817,0.5919,0.5746,0.6133,0.6030,0.6488,0.6338
         
     | 
| 24 | 
         
            +
            105k,0.6811,0.6772,0.6654,0.6646,,,0.6772,0.6519,,,0.6661,0.6472,0.6732,0.6369,0.6638,0.633,0.6740,0.6638,0.6140,0.5991,0.5912,0.5833,0.6046,0.5943,0.6496,0.618
         
     | 
| 25 | 
         
            +
            110k,0.7017,0.6867,0.6701,0.6654,,,0.6669,0.6480,0.6559,0.6456,0.6756,0.6551,0.6567,0.6401,0.6661,0.6456,0.6551,0.6535,0.6196,0.6006,,,0.6219,0.6069,0.6417,0.6338
         
     | 
| 26 | 
         
            +
            115k,0.6890,0.7040,0.6701,0.6654,,,0.6732,0.6511,0.6456,0.6227,0.6559,0.6456,0.6661,0.6488,0.6748,0.6527,0.6622,0.6448,0.6156,0.6014,,,0.6338,0.6069,0.6575,0.6196
         
     | 
| 27 | 
         
            +
            120k,0.6930,0.6953,0.6717,0.6701,,,0.6764,0.6464,0.6519,0.6275,0.6622,0.6480,0.6590,0.6322,0.6732,0.6377,0.6519,0.6622,0.5872,0.5612,,,0.6227,0.6077,0.6504,0.6275
         
     | 
| 28 | 
         
            +
            125k,0.6961,0.6977,0.6811,0.6819,,,0.6985,0.6433,0.6393,0.6417,0.6685,0.6433,0.6646,0.6338,0.6740,0.6559,0.6803,0.6693,0.6014,0.5888,,,0.6298,0.6243,0.6488,0.6117
         
     | 
| 29 | 
         
            +
            130k,0.6922,0.7056,0.6859,0.6717,,,0.6811,0.6330,0.6614,0.6393,0.6780,0.6322,0.6590,0.6361,0.6748,0.6456,0.6559,0.6472,0.6085,0.5880,,,,,0.6614,0.6235
         
     | 
| 30 | 
         
            +
            135k,0.6961,0.6953,0.6788,0.6756,,,0.6827,0.6614,,,0.6606,0.6575,0.6551,0.6464,0.6748,0.629,0.6677,0.6535,0.5991,0.5959,,,,,0.6433,0.6417
         
     | 
| 31 | 
         
            +
            140k,,,0.6819,0.6827,,,0.6867,0.6630,,,0.6598,0.6551,0.6567,0.6369,0.6709,0.6551,0.6638,0.6519,0.6038,0.5809,,,,,0.6472,0.6314
         
     | 
| 32 | 
         
            +
            145k,,,,,,,0.6819,0.6504,,,0.6717,0.6480,0.6669,0.6551,0.6661,0.6433,0.6725,0.6630,0.6180,0.5801,,,,,0.6606,0.644
         
     | 
| 33 | 
         
            +
            150k,,,,,,,0.6835,0.6646,,,0.6693,0.6654,,,0.6732,0.6148,0.6788,0.6409,0.6062,0.5991,,,,,0.6567,0.6361
         
     | 
| 34 | 
         
            +
            155k,,,,,,,0.6748,0.6590,,,0.6772,0.6677,0.6630,0.648,0.6851,0.6409,0.6922,0.6764,0.6204,0.6006,,,,,0.6677,0.6401
         
     | 
| 35 | 
         
            +
            160k,,,,,,,0.6875,0.6614,,,0.6709,0.6669,0.6748,0.648,0.6622,0.6638,0.6811,0.6803,0.6133,0.5864,,,,,0.6567,0.6322
         
     | 
| 36 | 
         
            +
            165k,,,,,,,0.6788,0.6661,,,0.6709,0.6717,0.6725,0.6433,,,,,0.6006,0.5856,,,,,0.6669,0.6472
         
     | 
| 37 | 
         
            +
            170k,,,,,,,0.6938,0.6709,,,0.6701,0.6598,0.6725,0.6354,,,0.6717,0.6867,0.6085,0.5833,,,,,0.6685,0.6575
         
     | 
| 38 | 
         
            +
            175k,,,,,,,0.6938,0.6693,,,0.6590,0.6622,0.6693,0.6614,,,0.6890,0.6867,0.6133,0.5754,,,,,0.6622,0.6598
         
     | 
| 39 | 
         
            +
            180k,,,,,,,0.6977,0.6646,,,0.6661,0.6646,0.6740,0.6661,,,0.6685,0.6504,0.6235,0.5967,,,,,0.6732,0.6535
         
     | 
| 40 | 
         
            +
            185k,,,,,,,0.6875,0.6519,,,0.6930,0.6535,0.6811,0.663,,,0.6851,0.6819,0.6156,0.5833,,,,,0.6661,0.648
         
     | 
| 41 | 
         
            +
            190k,,,,,,,0.6914,0.6859,,,0.6819,0.6606,,,,,0.6693,0.6638,0.6361,0.6006,,,,,0.6701,0.6488
         
     | 
| 42 | 
         
            +
            195k,,,,,,,0.6859,0.6614,,,0.6946,0.6732,,,,,0.6756,0.6638,0.6259,0.5841,,,,,0.6606,0.6543
         
     | 
| 43 | 
         
            +
            200k,,,,,,,0.6875,0.6669,,,0.6898,0.6780,,,,,0.7017,0.6701,0.6227,0.5872,,,,,0.6590,0.648
         
     | 
| 44 | 
         
            +
            205k,,,,,,,0.7072,0.6906,,,0.6969,0.6780,,,,,0.6827,0.6748,0.6306,0.5888,,,,,0.6725,0.659
         
     | 
| 45 | 
         
            +
            210k,,,,,,,0.6859,0.6661,,,0.6827,0.6748,,,,,0.6882,0.6717,0.6322,0.5919,,,,,0.6669,0.6488
         
     | 
| 46 | 
         
            +
            215k,,,,,,,0.7017,0.6780,,,0.6748,0.6772,,,,,0.6922,0.6709,0.6346,0.6006,,,,,0.6709,0.6661
         
     | 
| 47 | 
         
            +
            220k,,,,,,,0.7040,0.6788,,,0.6859,0.6732,,,,,0.6969,0.6638,0.6346,0.5983,,,,,,
         
     | 
| 48 | 
         
            +
            225k,,,,,,,0.7111,0.6717,,,0.6843,0.6685,,,,,0.6756,0.6606,0.6188,0.5935,,,,,,
         
     | 
| 49 | 
         
            +
            230k,,,,,,,0.7103,0.6811,,,,,,,,,0.7096,0.6701,0.6235,0.5935,,,,,,
         
     | 
| 50 | 
         
            +
            235k,,,,,,,0.7040,0.6772,,,,,,,,,0.7096,0.6764,0.6306,0.6062,,,,,,
         
     | 
| 51 | 
         
            +
            240k,,,,,,,0.7080,0.6851,,,,,,,,,,,0.6219,,,,,,,
         
     | 
| 52 | 
         
            +
            245k,,,,,,,0.6985,0.6938,,,,,,,,,,,0.6267,0.5888,,,,,,
         
     | 
| 53 | 
         
            +
            250k,,,,,,,0.7127,0.6938,,,,,,,,,,,0.6361,0.6006,,,,,,
         
     | 
| 54 | 
         
            +
            255k,,,,,,,0.7119,0.6827,,,,,,,,,,,0.6440,0.5998,,,,,,
         
     | 
| 55 | 
         
            +
            260k,,,,,,,0.7056,0.6867,,,,,,,,,,,0.6322,0.5975,,,,,,
         
     | 
| 56 | 
         
            +
            265k,,,,,,,0.7040,0.6756,,,,,,,,,,,0.6338,0.6069,,,,,,
         
     | 
| 57 | 
         
            +
            270k,,,,,,,0.7111,0.6819,,,,,,,,,,,0.6314,0.5991,,,,,,
         
     | 
| 58 | 
         
            +
            275k,,,,,,,0.7127,0.6811,,,,,,,,,,,0.6306,0.6148,,,,,,
         
     | 
| 59 | 
         
            +
            280k,,,,,,,0.7064,0.6914,,,,,,,,,,,0.6251,0.6054,,,,,,
         
     | 
| 60 | 
         
            +
            285k,,,,,,,0.7096,0.6977,,,,,,,,,,,0.6385,,,,,,,
         
     | 
| 61 | 
         
            +
            290k,,,,,,,,,,,,,,,,,,,0.6338,0.6077,,,,,,
         
     | 
| 62 | 
         
            +
            300k,,,,,,,,,,,,,,,,,,,0.6227,0.6093,,,,,,
         
     | 
| 63 | 
         
            +
            305k,,,,,,,,,,,,,,,,,,,0.6290,0.6069,,,,,,
         
     | 
| 64 | 
         
            +
            310k,,,,,,,,,,,,,,,,,,,0.6267,0.6156,,,,,,
         
     | 
| 65 | 
         
            +
            315k,,,,,,,,,,,,,,,,,,,0.6314,0.6101,,,,,,
         
     | 
| 66 | 
         
            +
            320k,,,,,,,,,,,,,,,,,,,0.6401,0.5991,,,,,,
         
     | 
| 67 | 
         
            +
            325k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 68 | 
         
            +
            330k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
| 69 | 
         
            +
            335k,,,,,,,,,,,,,,,,,,,,,,,,,,
         
     | 
    	
        main.py
    CHANGED
    
    | 
         @@ -150,7 +150,7 @@ def main(): 
     | 
|
| 150 | 
         
             
                return Div(
         
     | 
| 151 | 
         
             
                    D_title(
         
     | 
| 152 | 
         
             
                        H1(
         
     | 
| 153 | 
         
            -
                            "TxT360:  
     | 
| 154 | 
         
             
                            cls="l-body",
         
     | 
| 155 | 
         
             
                            style="text-align: center;",
         
     | 
| 156 | 
         
             
                        ),
         
     | 
| 
         @@ -192,7 +192,7 @@ def main(): 
     | 
|
| 192 | 
         
             
                                        ),
         
     | 
| 193 | 
         
             
                                        Li(
         
     | 
| 194 | 
         
             
                                            A(
         
     | 
| 195 | 
         
            -
                                                "Motivation Behind  
     | 
| 196 | 
         
             
                                                href="/intro#section2",
         
     | 
| 197 | 
         
             
                                                hx_get="/intro#section2",
         
     | 
| 198 | 
         
             
                                                hx_target="#inner-text",
         
     | 
| 
         @@ -298,7 +298,7 @@ def main(): 
     | 
|
| 298 | 
         
             
                                ),
         
     | 
| 299 | 
         
             
                                Div(
         
     | 
| 300 | 
         
             
                                    A(
         
     | 
| 301 | 
         
            -
                                        " 
     | 
| 302 | 
         
             
                                        href="/common#section1",
         
     | 
| 303 | 
         
             
                                        hx_get="/common#section1",
         
     | 
| 304 | 
         
             
                                        hx_target="#inner-text",
         
     | 
| 
         @@ -883,18 +883,17 @@ def intro(): 
     | 
|
| 883 | 
         
             
                return Div(
         
     | 
| 884 | 
         
             
                    Section(
         
     | 
| 885 | 
         
             
                        H2("About TxT360"),
         
     | 
| 886 | 
         
            -
                        P(
         
     | 
| 887 | 
         
            -
                            B(
         
     | 
| 888 | 
         
            -
                                "We introduce TxT360 (Trillion eXtracted Text) the first dataset to globally deduplicate 99 CommonCrawl snapshots and 14 commonly used non-web data sources (e.g. FreeLaw, PG-19, etc.) providing pretraining teams with a recipe to easily adjust data weighting and train the most performant models."
         
     | 
| 889 | 
         
             
                            )
         
     | 
| 890 | 
         
             
                        ),
         
     | 
| 891 | 
         
             
                        P(
         
     | 
| 892 | 
         
            -
                            "Building on top of the prior studies on pre-training data 
     | 
| 893 | 
         
             
                            D_cite(bibtex_key="refinedweb"),
         
     | 
| 894 | 
         
             
                            D_cite(bibtex_key="fineweb"),
         
     | 
| 895 | 
         
             
                            D_cite(bibtex_key="c4"),
         
     | 
| 896 | 
         
             
                            D_cite(bibtex_key="muennighoff2023scaling"),
         
     | 
| 897 | 
         
            -
                            "TxT360 carefully implements data processing steps including extraction, filtering, deduplication, personally identifiable information removal, and other steps.",
         
     | 
| 898 | 
         
             
                        ),
         
     | 
| 899 | 
         
             
                        P(
         
     | 
| 900 | 
         
             
                            "Metadata is stored to recover the raw distribution for each dataset, enabling fine-grained control to create data distributions and corpus of desired size. As an example, we present one simple upsampling scheme that takes into account the duplication counts, resulting in a 15~16 trillion token corpus, outperforming FineWeb and our non-upsampling baselines, on diverse evaluations. Unlike DCLM",
         
     | 
| 
         @@ -909,7 +908,7 @@ def intro(): 
     | 
|
| 909 | 
         
             
                        id="section1",
         
     | 
| 910 | 
         
             
                    ),
         
     | 
| 911 | 
         
             
                    Section(
         
     | 
| 912 | 
         
            -
                        H2("Motivation Behind  
     | 
| 913 | 
         
             
                        H3(
         
     | 
| 914 | 
         
             
                            "TxT360 is the first dataset to combine both web and curated data sources commonly used in pretraining."
         
     | 
| 915 | 
         
             
                        ),
         
     | 
| 
         @@ -939,7 +938,7 @@ def intro(): 
     | 
|
| 939 | 
         
             
                        ),
         
     | 
| 940 | 
         
             
                        table_div_data,
         
     | 
| 941 | 
         
             
                        P(
         
     | 
| 942 | 
         
            -
                            "We provide details and context for the choices behind TxT360 in the respective Web Data Processing and Curated Source Processing section. A deep dive describing the deduplication process can be found in the  
     | 
| 943 | 
         
             
                        ),
         
     | 
| 944 | 
         
             
                        # Img(src="images/pipeline.png", height="300", width="600"),
         
     | 
| 945 | 
         
             
                        # P(
         
     | 
| 
         | 
|
| 150 | 
         
             
                return Div(
         
     | 
| 151 | 
         
             
                    D_title(
         
     | 
| 152 | 
         
             
                        H1(
         
     | 
| 153 | 
         
            +
                            "TxT360: A Top-Quality LLM Pre-training Dataset Requires the Perfect Blend",
         
     | 
| 154 | 
         
             
                            cls="l-body",
         
     | 
| 155 | 
         
             
                            style="text-align: center;",
         
     | 
| 156 | 
         
             
                        ),
         
     | 
| 
         | 
|
| 192 | 
         
             
                                        ),
         
     | 
| 193 | 
         
             
                                        Li(
         
     | 
| 194 | 
         
             
                                            A(
         
     | 
| 195 | 
         
            +
                                                "Motivation Behind TxT360",
         
     | 
| 196 | 
         
             
                                                href="/intro#section2",
         
     | 
| 197 | 
         
             
                                                hx_get="/intro#section2",
         
     | 
| 198 | 
         
             
                                                hx_target="#inner-text",
         
     | 
| 
         | 
|
| 298 | 
         
             
                                ),
         
     | 
| 299 | 
         
             
                                Div(
         
     | 
| 300 | 
         
             
                                    A(
         
     | 
| 301 | 
         
            +
                                        "Shared Processing Steps",
         
     | 
| 302 | 
         
             
                                        href="/common#section1",
         
     | 
| 303 | 
         
             
                                        hx_get="/common#section1",
         
     | 
| 304 | 
         
             
                                        hx_target="#inner-text",
         
     | 
| 
         | 
|
| 883 | 
         
             
                return Div(
         
     | 
| 884 | 
         
             
                    Section(
         
     | 
| 885 | 
         
             
                        H2("About TxT360"),
         
     | 
| 886 | 
         
            +
                        P(  "TL;DR ", 
         
     | 
| 887 | 
         
            +
                            B("We introduce TxT360 (Trillion eXtracted Text), the first dataset to globally deduplicate 99 CommonCrawl snapshots and 14 high-quality data sources from diverse domains (e.g., FreeLaw, PG-19, etc.). Our large-scale deduplication process enables precise control over data weighting. In addition to document selection, TxT360, along with its rich metadata, allows for the assignment of optimal data weights. We demonstrate a simple but effective upsampling recipe that creates a 15+ trillion-token corpus, outperforming FineWeb 15T. Furthermore, TxT360 empowers pre-trainers to explore more advanced weighting techniques, a capability not commonly available in previous pre-training datasets."
         
     | 
| 
         | 
|
| 888 | 
         
             
                            )
         
     | 
| 889 | 
         
             
                        ),
         
     | 
| 890 | 
         
             
                        P(
         
     | 
| 891 | 
         
            +
                            "Building on top of the prior studies on pre-training data",
         
     | 
| 892 | 
         
             
                            D_cite(bibtex_key="refinedweb"),
         
     | 
| 893 | 
         
             
                            D_cite(bibtex_key="fineweb"),
         
     | 
| 894 | 
         
             
                            D_cite(bibtex_key="c4"),
         
     | 
| 895 | 
         
             
                            D_cite(bibtex_key="muennighoff2023scaling"),
         
     | 
| 896 | 
         
            +
                            ", TxT360 carefully implements data processing steps including extraction, filtering, deduplication, personally identifiable information removal, and other steps.",
         
     | 
| 897 | 
         
             
                        ),
         
     | 
| 898 | 
         
             
                        P(
         
     | 
| 899 | 
         
             
                            "Metadata is stored to recover the raw distribution for each dataset, enabling fine-grained control to create data distributions and corpus of desired size. As an example, we present one simple upsampling scheme that takes into account the duplication counts, resulting in a 15~16 trillion token corpus, outperforming FineWeb and our non-upsampling baselines, on diverse evaluations. Unlike DCLM",
         
     | 
| 
         | 
|
| 908 | 
         
             
                        id="section1",
         
     | 
| 909 | 
         
             
                    ),
         
     | 
| 910 | 
         
             
                    Section(
         
     | 
| 911 | 
         
            +
                        H2("Motivation Behind TxT360"),
         
     | 
| 912 | 
         
             
                        H3(
         
     | 
| 913 | 
         
             
                            "TxT360 is the first dataset to combine both web and curated data sources commonly used in pretraining."
         
     | 
| 914 | 
         
             
                        ),
         
     | 
| 
         | 
|
| 938 | 
         
             
                        ),
         
     | 
| 939 | 
         
             
                        table_div_data,
         
     | 
| 940 | 
         
             
                        P(
         
     | 
| 941 | 
         
            +
                            "We provide details and context for the choices behind TxT360 in the respective Web Data Processing and Curated Source Processing section. A deep dive describing the deduplication process can be found in the Shared Processing Steps section."
         
     | 
| 942 | 
         
             
                        ),
         
     | 
| 943 | 
         
             
                        # Img(src="images/pipeline.png", height="300", width="600"),
         
     | 
| 944 | 
         
             
                        # P(
         
     | 
    	
        overview.py
    CHANGED
    
    | 
         @@ -276,7 +276,7 @@ overview_div = Div( 
     | 
|
| 276 | 
         
             
                            Li("The Highlevel Data Process Approach", style = "margin-bottom: 5px"),
         
     | 
| 277 | 
         
             
                            Li("Introduction to Global Deduplication", style = "margin-bottom: 5px"),
         
     | 
| 278 | 
         
             
                        ),
         
     | 
| 279 | 
         
            -
                        H2("Motivation Behind  
     | 
| 280 | 
         
             
                        H3("TxT360 is the first dataset to combine both web and curated data sources commonly used in pretraining."),
         
     | 
| 281 | 
         
             
                        P("The quality and size of a pre-training dataset play a crucial role in the performance of large language models (LLMs). Data is often referred as low quality if it has not been filtered to review unwanted text. The community has introduced a variety of filtered datasets including purely web-based datasets. Commonly used pretraining datasets are:"),
         
     | 
| 282 | 
         
             
                        Ul(
         
     | 
| 
         | 
|
| 276 | 
         
             
                            Li("The Highlevel Data Process Approach", style = "margin-bottom: 5px"),
         
     | 
| 277 | 
         
             
                            Li("Introduction to Global Deduplication", style = "margin-bottom: 5px"),
         
     | 
| 278 | 
         
             
                        ),
         
     | 
| 279 | 
         
            +
                        H2("Motivation Behind TxT360"),
         
     | 
| 280 | 
         
             
                        H3("TxT360 is the first dataset to combine both web and curated data sources commonly used in pretraining."),
         
     | 
| 281 | 
         
             
                        P("The quality and size of a pre-training dataset play a crucial role in the performance of large language models (LLMs). Data is often referred as low quality if it has not been filtered to review unwanted text. The community has introduced a variety of filtered datasets including purely web-based datasets. Commonly used pretraining datasets are:"),
         
     | 
| 282 | 
         
             
                        Ul(
         
     | 
    	
        results.py
    CHANGED
    
    | 
         @@ -1,5 +1,10 @@ 
     | 
|
| 1 | 
         
             
            from fasthtml.common import *
         
     | 
| 2 | 
         
             
            from fasthtml.components import *
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 3 | 
         
             
            import json
         
     | 
| 4 | 
         
             
            from fh_plotly import plotly2fasthtml
         
     | 
| 5 | 
         
             
            from plotly import graph_objects as go
         
     | 
| 
         @@ -7,6 +12,74 @@ import pandas as pd 
     | 
|
| 7 | 
         
             
            import plotly.express as px
         
     | 
| 8 | 
         | 
| 9 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 10 | 
         
             
            ##upsampling validation loss graph
         
     | 
| 11 | 
         | 
| 12 | 
         
             
            # Data
         
     | 
| 
         @@ -45,7 +118,7 @@ txt360 = [2.589649677, 2.438303471, 2.383416414, 2.337049007, 2.300292492, 
     | 
|
| 45 | 
         
             
            fig_val = go.Figure()
         
     | 
| 46 | 
         | 
| 47 | 
         
             
            # Add lines
         
     | 
| 48 | 
         
            -
            fig_val.add_trace(go.Scatter(x=steps, y=fineweb, mode='lines', name='FineWeb 
     | 
| 49 | 
         
             
            fig_val.add_trace(go.Scatter(x=steps, y=txt360, mode='lines', name='TxT360'))
         
     | 
| 50 | 
         | 
| 51 | 
         
             
            # Update layout
         
     | 
| 
         @@ -56,8 +129,6 @@ fig_val.update_layout( 
     | 
|
| 56 | 
         
             
                legend_title='Models'
         
     | 
| 57 | 
         
             
            )
         
     | 
| 58 | 
         | 
| 59 | 
         
            -
            # Show plot
         
     | 
| 60 | 
         
            -
             
     | 
| 61 | 
         
             
            # Show the plot
         
     | 
| 62 | 
         
             
            validation_loss_graph = fig_val
         
     | 
| 63 | 
         | 
| 
         @@ -716,7 +787,7 @@ dataset_comparison = pd.DataFrame( 
     | 
|
| 716 | 
         
             
                            "28.04",
         
     | 
| 717 | 
         
             
                            "25.61",
         
     | 
| 718 | 
         
             
                        ],
         
     | 
| 719 | 
         
            -
                        "FineWeb 
     | 
| 720 | 
         
             
                            "71.5",
         
     | 
| 721 | 
         
             
                            "82.1",
         
     | 
| 722 | 
         
             
                            "79.46",
         
     | 
| 
         @@ -763,24 +834,61 @@ table_div_1 = Div(NotStr(table_html), 
     | 
|
| 763 | 
         
             
            intro_div = Div(
         
     | 
| 764 | 
         
             
                H2("TxT360 Studies"),
         
     | 
| 765 | 
         
             
                H3("What This Section Contains"),
         
     | 
| 766 | 
         
            -
                P("This section  
     | 
| 
         | 
|
| 767 | 
         
             
                Ul(
         
     | 
| 768 | 
         
            -
                    Li(" 
     | 
| 769 | 
         
            -
                    Li("Perplexity  
     | 
| 770 | 
         
             
                ),
         
     | 
| 771 | 
         
             
            )
         
     | 
| 772 | 
         | 
| 773 | 
         | 
| 774 | 
         
             
            upsampling_exp = Div(
         
     | 
| 775 | 
         
            -
                    H2(" 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 776 | 
         
             
                    H3("Experiment Setup"),
         
     | 
| 777 | 
         
            -
                    P( 
     | 
| 778 | 
         
            -
             
     | 
| 779 | 
         
            -
                     
     | 
| 780 | 
         
            -
                     
     | 
| 781 | 
         
            -
             
     | 
| 782 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 783 | 
         
             
                    plotly2fasthtml(lm_loss_graph),
         
     | 
| 
         | 
|
| 784 | 
         
             
            )
         
     | 
| 785 | 
         | 
| 786 | 
         
             
            preplexity_intro_div = Div(
         
     | 
| 
         | 
|
| 1 | 
         
             
            from fasthtml.common import *
         
     | 
| 2 | 
         
             
            from fasthtml.components import *
         
     | 
| 3 | 
         
            +
            from fasthtml.components import (
         
     | 
| 4 | 
         
            +
                D_cite,
         
     | 
| 5 | 
         
            +
            )
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            import os
         
     | 
| 8 | 
         
             
            import json
         
     | 
| 9 | 
         
             
            from fh_plotly import plotly2fasthtml
         
     | 
| 10 | 
         
             
            from plotly import graph_objects as go
         
     | 
| 
         | 
|
| 12 | 
         
             
            import plotly.express as px
         
     | 
| 13 | 
         | 
| 14 | 
         | 
| 15 | 
         
            +
            ## Evaluation Graphs
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            # Load the data
         
     | 
| 18 | 
         
            +
            all_eval_results = {}
         
     | 
| 19 | 
         
            +
            for fname in os.listdir("data/txt360_eval"):
         
     | 
| 20 | 
         
            +
                if fname.endswith(".csv"):
         
     | 
| 21 | 
         
            +
                    metric_name = fname.replace("CKPT Eval - ", "").replace(".csv", "")
         
     | 
| 22 | 
         
            +
                    all_eval_results[metric_name] = {}
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
                    # with open(os.path.join("data/txt360_eval", fname)) as f:
         
     | 
| 25 | 
         
            +
                    df = pd.read_csv(os.path.join("data/txt360_eval", fname))
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
                    # slimpajama_res = df.iloc[2:, 2].astype(float).fillna(0.0) # slimpajama
         
     | 
| 28 | 
         
            +
                    fineweb_res = df.iloc[2:, 4].astype(float).fillna(method="bfill") # fineweb
         
     | 
| 29 | 
         
            +
                    txt360_base = df.iloc[2:, 5].astype(float).fillna(method="bfill") # txt360-dedup-only
         
     | 
| 30 | 
         
            +
                    txt360_web_up = df.iloc[2:, 7].astype(float).fillna(method="bfill") # txt360-web-only-upsampled
         
     | 
| 31 | 
         
            +
                    txt360_all_up_stack = df.iloc[2:, 9].astype(float).fillna(method="bfill") # txt360-all-upsampled + stackv2
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                    # each row is 20B tokens.
         
     | 
| 34 | 
         
            +
                    # all_eval_results[metric_name]["slimpajama"] = slimpajama_res
         
     | 
| 35 | 
         
            +
                    all_eval_results[metric_name]["fineweb"] = fineweb_res
         
     | 
| 36 | 
         
            +
                    all_eval_results[metric_name]["txt360-dedup-only"] = txt360_base
         
     | 
| 37 | 
         
            +
                    all_eval_results[metric_name]["txt360-web-only-upsampled"] = txt360_web_up
         
     | 
| 38 | 
         
            +
                    all_eval_results[metric_name]["txt360-all-upsampled + stackv2"] = txt360_all_up_stack
         
     | 
| 39 | 
         
            +
                    all_eval_results[metric_name]["token"] = [20 * i for i in range(len(fineweb_res))]
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
            # Eval Result Plots
         
     | 
| 43 | 
         
            +
            all_eval_res_figs = {}
         
     | 
| 44 | 
         
            +
            for metric_name, res in all_eval_results.items():
         
     | 
| 45 | 
         
            +
                fig_res = go.Figure()
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
                # Add lines
         
     | 
| 48 | 
         
            +
                fig_res.add_trace(go.Scatter(
         
     | 
| 49 | 
         
            +
                    x=all_eval_results[metric_name]["token"], 
         
     | 
| 50 | 
         
            +
                    y=all_eval_results[metric_name]["fineweb"], 
         
     | 
| 51 | 
         
            +
                    mode='lines', name='FineWeb'
         
     | 
| 52 | 
         
            +
                ))
         
     | 
| 53 | 
         
            +
                fig_res.add_trace(go.Scatter(
         
     | 
| 54 | 
         
            +
                    x=all_eval_results[metric_name]["token"], 
         
     | 
| 55 | 
         
            +
                    y=all_eval_results[metric_name]["txt360-web-only-upsampled"], 
         
     | 
| 56 | 
         
            +
                    mode='lines', name='TxT360 - CC Data Upsampled'
         
     | 
| 57 | 
         
            +
                ))
         
     | 
| 58 | 
         
            +
                fig_res.add_trace(go.Scatter(
         
     | 
| 59 | 
         
            +
                    x=all_eval_results[metric_name]["token"], 
         
     | 
| 60 | 
         
            +
                    y=all_eval_results[metric_name]["txt360-dedup-only"], 
         
     | 
| 61 | 
         
            +
                    mode='lines', name='TxT360 - CC Data Dedup'
         
     | 
| 62 | 
         
            +
                ))
         
     | 
| 63 | 
         
            +
                fig_res.add_trace(go.Scatter(
         
     | 
| 64 | 
         
            +
                    x=all_eval_results[metric_name]["token"], 
         
     | 
| 65 | 
         
            +
                    y=all_eval_results[metric_name]["txt360-all-upsampled + stackv2"], 
         
     | 
| 66 | 
         
            +
                    mode='lines', name='TxT360 - Full Upsampled + Stack V2'
         
     | 
| 67 | 
         
            +
                ))
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
                print(all_eval_results[metric_name]["token"])
         
     | 
| 70 | 
         
            +
                print(all_eval_results[metric_name]["fineweb"].tolist())
         
     | 
| 71 | 
         
            +
                print(all_eval_results[metric_name]["txt360-web-only-upsampled"].tolist())
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
                # Update layout
         
     | 
| 74 | 
         
            +
                fig_res.update_layout(
         
     | 
| 75 | 
         
            +
                    title=f"{metric_name} Performance",
         
     | 
| 76 | 
         
            +
                    title_x=0.5,  # Centers the title
         
     | 
| 77 | 
         
            +
                    xaxis_title="Billion Tokens",
         
     | 
| 78 | 
         
            +
                    yaxis_title=metric_name,
         
     | 
| 79 | 
         
            +
                    legend_title="Dataset",
         
     | 
| 80 | 
         
            +
                )
         
     | 
| 81 | 
         
            +
                all_eval_res_figs[metric_name] = fig_res
         
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
             
            ##upsampling validation loss graph
         
     | 
| 84 | 
         | 
| 85 | 
         
             
            # Data
         
     | 
| 
         | 
|
| 118 | 
         
             
            fig_val = go.Figure()
         
     | 
| 119 | 
         | 
| 120 | 
         
             
            # Add lines
         
     | 
| 121 | 
         
            +
            fig_val.add_trace(go.Scatter(x=steps, y=fineweb, mode='lines', name='FineWeb'))
         
     | 
| 122 | 
         
             
            fig_val.add_trace(go.Scatter(x=steps, y=txt360, mode='lines', name='TxT360'))
         
     | 
| 123 | 
         | 
| 124 | 
         
             
            # Update layout
         
     | 
| 
         | 
|
| 129 | 
         
             
                legend_title='Models'
         
     | 
| 130 | 
         
             
            )
         
     | 
| 131 | 
         | 
| 
         | 
|
| 
         | 
|
| 132 | 
         
             
            # Show the plot
         
     | 
| 133 | 
         
             
            validation_loss_graph = fig_val
         
     | 
| 134 | 
         | 
| 
         | 
|
| 787 | 
         
             
                            "28.04",
         
     | 
| 788 | 
         
             
                            "25.61",
         
     | 
| 789 | 
         
             
                        ],
         
     | 
| 790 | 
         
            +
                        "FineWeb": [
         
     | 
| 791 | 
         
             
                            "71.5",
         
     | 
| 792 | 
         
             
                            "82.1",
         
     | 
| 793 | 
         
             
                            "79.46",
         
     | 
| 
         | 
|
| 834 | 
         
             
            intro_div = Div(
         
     | 
| 835 | 
         
             
                H2("TxT360 Studies"),
         
     | 
| 836 | 
         
             
                H3("What This Section Contains"),
         
     | 
| 837 | 
         
            +
                P("This section shows the learning curve when pre-training on TxT360, with a proper upsampling approach. We compare several simple strategies and demonstrate that one particular upsampling method, inspired by the natural data distribution, performs exceptionally well. In our preliminary experiments, the model learns significantly faster on TxT360 compared to a similarly scaled dataset, FineWeb. We believe that a more carefully designed upsampling strategy could further enhance the use of our data."),
         
     | 
| 838 | 
         
            +
                P("In addition to the training results, we also provide an analysis of the dataset, including perplexity trends over time across the CommonCrawl snapshots. This section is organized into the following topic areas:"),
         
     | 
| 839 | 
         
             
                Ul(
         
     | 
| 840 | 
         
            +
                    Li("The Learning Curve of TxT360 with an Upsampling Recipe", style = "margin-bottom: 5px"),
         
     | 
| 841 | 
         
            +
                    Li("Perplexity Analysis across time", style = "margin-bottom: 5px"),
         
     | 
| 842 | 
         
             
                ),
         
     | 
| 843 | 
         
             
            )
         
     | 
| 844 | 
         | 
| 845 | 
         | 
| 846 | 
         
             
            upsampling_exp = Div(
         
     | 
| 847 | 
         
            +
                    H2("A Simple Data Mix Creates a Good Learning Curve"),
         
     | 
| 848 | 
         
            +
                    P(
         
     | 
| 849 | 
         
            +
                        "As discussed in prior sections, duplicated documents can significantly reduce training efficiency (i.e., the ratio of model performance to the number of pre-trained tokens). Previous work, such as RefinedWeb",
         
     | 
| 850 | 
         
            +
                        D_cite(bibtex_key="refinedweb"),
         
     | 
| 851 | 
         
            +
                        ", emphasizes the importance of deduplication. Recently, the FineWeb study conducted an interesting analysis, comparing LLM performance when pre-trained on globally deduplicated versus locally deduplicated datasets. They found that training efficiency with a globally deduplicated dataset can be worse",
         
     | 
| 852 | 
         
            +
                        D_cite(bibtex_key="fineweb"),
         
     | 
| 853 | 
         
            +
                        ". Fineweb hypothesize that global deduplication may remove a higher proportion of high-quality documents."          
         
     | 
| 854 | 
         
            +
                    ),
         
     | 
| 855 | 
         
            +
                    P(
         
     | 
| 856 | 
         
            +
                        "This finding led us to consider that a pre-training corpus based on crawled websites is naturally upsampled for a variety of reasons. For example, commonly used templates or boilerplates may appear millions of times; a well-regarded article reposted by different users may surface across multiple sites; and the same web pages, crawled by CommonCrawl at different times, will duplicate each other. The reasons behind these duplications vary: some may serve as indirect indicators of high-quality content, while others may not. Therefore, curating a pre-training dataset should involve leveraging these signals and considering data weighting schemes — or at the very least, provide users with the necessary information to control it effectively."
         
     | 
| 857 | 
         
            +
                    ),    
         
     | 
| 858 | 
         
            +
                    P(
         
     | 
| 859 | 
         
            +
                        "To this end, we store rich metadata for each document source, including features like user votes from StackExchange. One crucial piece of metadata is the number of duplicates detected for a document. This information allows users to reconstruct the natural web distribution, but more importantly, we will demonstrate that a simple upsampling recipe based on this metadata can create a high-quality data mix."
         
     | 
| 860 | 
         
            +
                    ),
         
     | 
| 861 | 
         
             
                    H3("Experiment Setup"),
         
     | 
| 862 | 
         
            +
                    P(
         
     | 
| 863 | 
         
            +
                        "Motivated by the FineWeb study, we opted to upsample documents based on their natural distribution. However, since duplication is only an indirect indicator of quality, we upsample documents to a few predefined levels rather than using their exact count. Specifically, we set the upsampling weight to 3 for documents with 2 to 5 duplicates, 5 for those with 5 to 100 duplicates, 8 for 101 to 1000 duplicates, and 10 for documents with over 1000 duplicates. These values were selected heuristically and informed by preliminary small-scale experiments. For non-CommonCrawl data sources, we assign a weight of 2 if the document appears more than once. This straightforward approach results in a corpus exceeding 15 trillion tokens, making it one of the largest open-access pre-training datasets available."
         
     | 
| 864 | 
         
            +
                    ),
         
     | 
| 865 | 
         
            +
                    P(
         
     | 
| 866 | 
         
            +
                        "To evaluate the training efficiency of our dataset, we sampled 1.5T tokens from both FineWeb and TxT360 (using the aforementioned weighting) and conducted a training ablation on an 8x8B Mixture-of-Experts architecture, similar to Mixtral. We compared the learning curves by tracking training loss, validation scores, and performance across a wide array of diverse evaluation benchmarks. The validation set was sampled independently from SlimPajama",
         
     | 
| 867 | 
         
            +
                        D_cite(bibtex_key="cerebras2023slimpajama"),
         
     | 
| 868 | 
         
            +
                        ". Note that this experiment is done on a slightly earlier version of the dataset."
         
     | 
| 869 | 
         
            +
                    ),
         
     | 
| 870 | 
         
            +
                    H3("Learning Curves on the Evaluation Metrics"),
         
     | 
| 871 | 
         
            +
                    P(
         
     | 
| 872 | 
         
            +
                        "Evaluation results are the most direct indicator of model quality. We assess the intermediate results of the models across multiple metrics and plot the learning curves. Our findings indicate that the model learns significantly faster with TxT360. For a fair comparison, we evaluate TxT360 against FineWeb using only the CommonCrawl data sources, and we also show the curves after incorporating the 14 curated sources and coding data (Stack V2), demonstrating the full potential of the dataset. Due to computation resource constraints, we stop running experiments when we can observe clear trends."
         
     | 
| 873 | 
         
            +
                    ),
         
     | 
| 874 | 
         
            +
                    P(
         
     | 
| 875 | 
         
            +
                        "Based on the metrics, we find that TxT360’s CommonCrawl portion consistently outperforms FineWeb after upsampling, particularly on challenging tasks like MMLU and generation tasks such as NQ. Similar to the findings in DCLM, adding non-CommonCrawl data sources produces mixed results, especially when testing with that specific version of the data. We have since updated the non-CC data to further reduce noise."
         
     | 
| 876 | 
         
            +
                    ),
         
     | 
| 877 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["MMLU"]),
         
     | 
| 878 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["NQ"]),
         
     | 
| 879 | 
         
            +
                    # plotly2fasthtml(all_eval_res_figs["GSM8K"]),
         
     | 
| 880 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["HellaSwag"]),
         
     | 
| 881 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["MedQA"]),
         
     | 
| 882 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["PIQA"]),
         
     | 
| 883 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["TriviaQA"]),
         
     | 
| 884 | 
         
            +
                    plotly2fasthtml(all_eval_res_figs["WinoGrande"]),
         
     | 
| 885 | 
         
            +
             
     | 
| 886 | 
         
            +
                    H3("Comparing the Loss Curves"),
         
     | 
| 887 | 
         
            +
                    P(
         
     | 
| 888 | 
         
            +
                        "We also plot the training and validation loss curves for each dataset, showing that TxT360 achieves both lower training and validation losses compared to FineWeb. Although training loss may not correlate directly with final model performance, we observe that the loss curve for TxT360 exhibits fewer spikes compared to FineWeb, indicating more stable training dynamics."
         
     | 
| 889 | 
         
            +
                    ),
         
     | 
| 890 | 
         
             
                    plotly2fasthtml(lm_loss_graph),
         
     | 
| 891 | 
         
            +
                    plotly2fasthtml(validation_loss_graph),
         
     | 
| 892 | 
         
             
            )
         
     | 
| 893 | 
         | 
| 894 | 
         
             
            preplexity_intro_div = Div(
         
     |