diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3f8513ed7f971a747ef150a5993a81447bc4fae3 Binary files /dev/null and b/.DS_Store differ diff --git a/cache_dir/.DS_Store b/cache_dir/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..54996a31d16648a1c7008898f28ac71cc86e6d8f --- /dev/null +++ b/cache_dir/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f54406681cc569a9d56248a3be247afb7952fa5272ab20bb039e584f80d26c6 +size 14340 diff --git a/cache_dir/HuggingFaceM4/.DS_Store b/cache_dir/HuggingFaceM4/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3a72a4324613d683776604e1f63d07b94a4e5225 --- /dev/null +++ b/cache_dir/HuggingFaceM4/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eda935b3db4b738eb991f2ad964f1f4f075f9161bf3df13fb43c6a7a0fda6e1 +size 6148 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..96f2b617a406a2edba0977b91fc17cc3d0b5d8d4 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f40ba1ae0df6f456333e7b4708a4b420f29a8910a9f93dea56797202bb4b8d2 +size 6148 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/associations/identity_terms.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/associations/identity_terms.json new file mode 100644 index 0000000000000000000000000000000000000000..c4db3147854fbe5ed5a19267fd5f57e7f07f8a91 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/associations/identity_terms.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de47c409dc0e40d49e94dddcf96a566efb6385c3dcf4ef2f380e603c3808a620 +size 170 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00000-of-00002.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00000-of-00002.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c7a2cf2c23ee4e383e6d3b7f8b8c48cbbb58ae14 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00000-of-00002.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f1296731fb15954bceb2aae92ea57bd8351aa21017eb5032eea1be391b32b3 +size 259553152 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00001-of-00002.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00001-of-00002.arrow new file mode 100644 index 0000000000000000000000000000000000000000..50d50a8a16faf562f861b3f4d57f530b80f1369a --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00001-of-00002.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89173d78ae611fdfdd580dc00877e2022df059141619d44b43f8bc1f9856c2ca +size 259689664 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1134a9c734aeebc9451cac850878cbcdae0882b8 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2356c6d8590c44752fc0a5f934b4196eb98a6c8e02b10072d1e238692f95c5f0 +size 1020 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..dff9c02261f89ba05a706f76541f298617837cdc --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca13786924575ed7686a30fe3821066a007fc76afe7515b19996f3bba0257cdb +size 309 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/general_stats_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..48fe1b481eabdda03cf1e8ff897f20dadee57adf --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/general_stats_dict.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110346fed8c658846abc5d16843a68bf67df80dbee84cd8e443901719cc3fd7c +size 117 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/length_measurements.json new file mode 100644 index 0000000000000000000000000000000000000000..2c253e5de097ab59964db3eb44b23944abab0d6d --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/length_measurements.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042be16be1e61bd4e4dd7e1d47d95a67a74d5c95153aa9b3439248f8043c3e58 +size 128 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_fig.png new file mode 100644 index 0000000000000000000000000000000000000000..4a528860c18aa9051b8d900337de31363d62e153 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_fig.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeca495c3ed1624319041b1822e5a679f6776c4c3478feead84eecd06046159b +size 61436 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_table.json new file mode 100644 index 0000000000000000000000000000000000000000..561a180c7530ddbf02a0ee8b28920fa799a26617 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_table.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8687bd444d4f8d7a0aaa35bd51b51f25136e2b2078e8646df187f6a0caa4bf6b +size 348823286 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/sorted_top_vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..69b9dfed960ce13e72aad6c144efabeea3b293c5 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/sorted_top_vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ed278c2455f28e88ebea8593eecc44f7e04bf6dd3a851afac8c60a408efcc3 +size 8105 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b7229e29cb432b99da935feaa753dd3216156d22 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b9c7e98ea4cc4b01f89eee3a3a7ef47fa6d9a30fcebb22d7273ba7aef52392f +size 334497112 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..98b48931fdb0293076d881845378c367f209d6e8 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d63925f8275f48044b2e9c7ff53d823e69d43e90ddd928fdb255acd3ae066b7 +size 678 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..98f7da4f82edfc1afd4c47f6e275d7ecedeb7c6f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c029143f6689e25c7b1e2bdf0c8e81146f404512811d81254985bbddb61dcd9 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.html new file mode 100644 index 0000000000000000000000000000000000000000..04ea9b436eeeed38bfb7d5e61104d8cbc88d0c42 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac4603f73676cda96b313d6c941100bf3939adbf04aa18d932fa87b7fc7e8cd7 +size 23807 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.json new file mode 100644 index 0000000000000000000000000000000000000000..7c2f12e6a51b7b0b28735291a38fab4b5569c3ae --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24a6f6eae405332231d9fc790f2023cba7e476ebf920ce21791ad5d9338787c +size 22135 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/tokenized_df.json new file mode 100644 index 0000000000000000000000000000000000000000..4587ca9fa5b02b2da40db672ab51f168e7de159f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/tokenized_df.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f75e007c2895c3141121690edf9d4028ae64b77bd83ca7ecb6f2ece27b51c2 +size 844434693 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/vocab_counts.json new file mode 100644 index 0000000000000000000000000000000000000000..2d627c7a346f358846ccddaca8d042a92b7c70d7 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/vocab_counts.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ff40d8372ff15bead1766d2f6b14e349a64d057a6f298ed52726e31b1873c5 +size 39026204 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 +size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfdb32df891cb4bec1107d5edee9c07d2905009 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 +size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7925f467949e4a6e94ff489f13667b228614944 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json new file mode 100644 index 0000000000000000000000000000000000000000..1751c969329e31c30a86204214e2d70b8a547aab --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 +size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..da74a2136afa60dc095884a520f0802ca5699f46 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a45931e78871dcb39b814081ef2c49939709c3feebcddaf7dc9e221a557b24 +size 95 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json new file mode 100644 index 0000000000000000000000000000000000000000..7224ed2aacba79d349c91858251a64a913508a4d --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500e2c7bd48b76556389af8630b5093b147c32affc0528305696531841df7582 +size 113 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png new file mode 100644 index 0000000000000000000000000000000000000000..95b17353049c9f321098b3e0a4c9f18c58011499 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4136b1d8653ecb9b425b162624c8adc8ca2c53ae44af90a34b65b0aaa946fb1 +size 69909 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json new file mode 100644 index 0000000000000000000000000000000000000000..e6b794afc1a944aadf035ded539af64eeb8d6817 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d585449245038b71c834729d34b61b023b9866eec20f39fe33430a97a84be38 +size 33277 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..bed85900a45808e27b8ea6519d858ea9b2525553 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a64574c2dd6522190927d73577c84e293e11f179f37482e2305a159d27e7e80 +size 7785 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow new file mode 100644 index 0000000000000000000000000000000000000000..fe7c2cbb80d16f477b51f15dc207e177c09d7a07 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e5fe498e11eeb301bd8430d2f2c408c696d36bbc21b1d18f31c4cf957a8d47 +size 67576 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..acb327aa45186b4ba9beb2e595e0c51e81df9bc5 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04dc98da752f00f252635eed55717c8a12dee20edb3c8347bd34e2228b16bb3 +size 29512 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b56504c71ed317b6e9bad4d42cc4c0949e4c35b --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b +size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..525a1826aa510bdbe54761f9230e6febc5af4659 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5589b05d616fd17ead2fd469ad921b88ee2c464cb5bd21afca6a4876403c1ba7 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html new file mode 100644 index 0000000000000000000000000000000000000000..9b69961eedaf6813a142361e28d635099138ed1a --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 +size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json new file mode 100644 index 0000000000000000000000000000000000000000..e228df0d5145a7052b71fa08c77f53a5a7851041 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 +size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json new file mode 100644 index 0000000000000000000000000000000000000000..438a4f46b294c972d10264f18f133b5c63bbb2e3 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68e3413893f2173dc0abf1c0f818d51b14c5c1618622b5ba31d903423df3a21 +size 71186 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json new file mode 100644 index 0000000000000000000000000000000000000000..fd0a3520f43810820603aa8920d683492b216c12 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2dbb4848f61dcc440ce6a940617092309b10c5dd589517772509102dc4b88f +size 121825 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..aac369d32ecce8762df463e967eca746eef343e8 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56322c6815bf2be43a1d03dc8dd20c76c8ddf96e68a719e95c92d07917b06e3 +size 329 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html new file mode 100644 index 0000000000000000000000000000000000000000..4f0088b93eb7cbbf2882934356d8e6fde038137f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f188fe76c2135208a74b9c5bfb7e107d3473f513205489ade28cb0e727adc42 +size 3606637 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json new file mode 100644 index 0000000000000000000000000000000000000000..0abf49eba915d4cbfc27476f2c6db97708cda2ce --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a49d21ab8319d8a2d9177770b511b2b31b735c70a28f89afeb140b5c6f938ff +size 20320 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 +size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfdb32df891cb4bec1107d5edee9c07d2905009 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 +size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7925f467949e4a6e94ff489f13667b228614944 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json new file mode 100644 index 0000000000000000000000000000000000000000..1751c969329e31c30a86204214e2d70b8a547aab --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 +size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..e791015a9d3d1def46573f3d2c25f9ca31597245 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c5e8fe33ca5024e519e7d550f46e3917c9ba8dc63f43ecc107c3c31430cb00 +size 93 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json new file mode 100644 index 0000000000000000000000000000000000000000..3c45595d465c8658d2590b2fb1353734a3561b10 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0889a390641bd052cbf295478822db4509a1731d20f7fda753198c64f9970c88 +size 125 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png new file mode 100644 index 0000000000000000000000000000000000000000..281d61173e342afd86075945dc5303e8ba7f762e --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e075d701bdeacb58f17fab26d2e4a98e861be2a5e67282b9fa0fabb589181e +size 66291 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json new file mode 100644 index 0000000000000000000000000000000000000000..962855171dcdcb070bbc148adbbadf0055d6d1f7 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8704412454924107067ae6b3d20534e8cc9917a3ae22ec8b105e7506af867cd +size 31341 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..3e1d8f996b521986a347beddec9e404da98582d2 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc5cfc0aa78c0984d45847ffb88a666149f8ce364a66a447a08a7f445d83671 +size 7907 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b040f2d20cc27ec0cbd6f8cc31ff4aadaf5f0fc1 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b553cbdbee049fad4ebd08fddd52db9145c77ad3846008557226e1c0157c3b93 +size 57920 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e676e4a964f3ed452053b995f1bab0ccbc9322a9 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19726400fd8623d601b7cfbb85e9cf1aa063c8f5c00b7baa0f1de46eacff0d80 +size 24208 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b56504c71ed317b6e9bad4d42cc4c0949e4c35b --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b +size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..f3de28d8dc1042eea52d147862261de0be686acb --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e52676a9496280a7002faad9facf941ac9ca075b8bb0fed7aa655e546e4e9d6 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html new file mode 100644 index 0000000000000000000000000000000000000000..9b69961eedaf6813a142361e28d635099138ed1a --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 +size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json new file mode 100644 index 0000000000000000000000000000000000000000..e228df0d5145a7052b71fa08c77f53a5a7851041 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 +size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json new file mode 100644 index 0000000000000000000000000000000000000000..2ca5d7c5f1ccf663f5eaa3a0572a1d09ee07f84f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c247100203577651e9438449aa807aefa89e6f3f435fa86a72e1962a34620e3 +size 65220 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json new file mode 100644 index 0000000000000000000000000000000000000000..99b1f399e9e28d3dc620b9ab338f887d546721de --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432408d6694ad4a306b8cfc88b7c4bbe81b9520ff569d6bd40a6c4f976283b8a +size 85002 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..e6649f3cf69d596cb42c2a12b911e5626fee7426 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f08d9b8167bd637b20b42930ab92d77aa3e654ba3b9adfd38511925db033737 +size 450 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html new file mode 100644 index 0000000000000000000000000000000000000000..67d8c970987f9fdf3b221edff0f4d13ca9dfa104 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40f7cd29896713f3d18e0b7f5eb94f940145960917a221e0826826ea602193d +size 3603675 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json new file mode 100644 index 0000000000000000000000000000000000000000..dd37cc120961848e8be8e2542f31d35b38cc0889 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf29753dd02d79f51129508dbe3b95d6b7f00020b8ca5a0a8a5e23d98582b47a +size 17420 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json new file mode 100644 index 0000000000000000000000000000000000000000..3aad02b4402f43554bcace344e426bcb34cf369c --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63abf99273843d82dfb120f6b5424186b36192a77131b993f920ffa8f69d495 +size 66 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json new file mode 100644 index 0000000000000000000000000000000000000000..6d20e8227c0de9100629646a63d87aa2798ced59 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74810d05acea27865e51072a54bbeccfeeaf25fb43f916c1e000142e848ab5aa +size 206518 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json new file mode 100644 index 0000000000000000000000000000000000000000..850d2ae940fec999c026c8f586b30ec6b145d0e9 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed3cdbfb67411b61c50ed8fc7ff8671535b5093993adc725af99a2121627779 +size 336288 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json new file mode 100644 index 0000000000000000000000000000000000000000..0846a7602fa3249124a7bf60db90b675d247ade5 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eadc8cbd0178d1a8ac567aeb29f460e46048b260e2e5f0e3314eafddd9d1d8d6 +size 555707 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json new file mode 100644 index 0000000000000000000000000000000000000000..d4f8425389c39bee4de0a8ce67860529d76ae3ab --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae639cd319d22742bb019e0c37616ef7abe6342f43cb23f8829738cc72178803 +size 327867 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json new file mode 100644 index 0000000000000000000000000000000000000000..18a99715f5ed1c4179dd7f4335ef058974da0bae --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91383fc3bf336adbc3b6350b1f5c37c312b8bbd4821432f61a4647884402ea1 +size 187297 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json new file mode 100644 index 0000000000000000000000000000000000000000..56ed89234b80e1fcf420748adf280eb6e05bd6e6 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52da51e47fddc9655c0a98bc5bc4e72a954f90b25019b1cec2030d33d0dcd44 +size 494217 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json new file mode 100644 index 0000000000000000000000000000000000000000..161e7538a6b56cff65d0ca8f09f18ec9966b71fe --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d058e8b1d68e4beb2349a00a37875c15b80abe3e5ae68603111c11c04bd3b93 +size 387062 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json new file mode 100644 index 0000000000000000000000000000000000000000..e331bf3d64f647d095059ba0415aa1d8c2f875eb --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1800a112cda7eddc6e2dc5369c274ba7f25ff69277928dbdf55c506c541046bc +size 492544 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json new file mode 100644 index 0000000000000000000000000000000000000000..efea2659663171c7a5bfddb08ac66ccca89028fd --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f300687ddb8b0288dfbd2fab15b1e055159317877a9719400a6b99b7495894fb +size 173867 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json new file mode 100644 index 0000000000000000000000000000000000000000..f19c6c2460c677a35d107b0313ab6db4192c6498 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48e0b376be3b9913c173841c2297cefe24d30236b2118801884a4c1cf6cf2de +size 231600 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json new file mode 100644 index 0000000000000000000000000000000000000000..853c8aeb728fb614bdaf1877293edeac73409d1c --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67495752ef7cc424172f541728bd467b5a5f9c3949035becb8620560cb617ea9 +size 146058 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json new file mode 100644 index 0000000000000000000000000000000000000000..48ecb51ee20b6c340907b4b8bf55e1aff39d6249 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce6cfa0a718838bbef5e19618c6cb0df3036af3fde4f5b7f1bd26e17cdf481c +size 208749 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json new file mode 100644 index 0000000000000000000000000000000000000000..e1fc291c18c17a48139c57e12c94f6ce47512878 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79d6f4af19e958d6cac923afb85913fd9ebfd519903c2247d62475d1ee38f0a +size 239132 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json new file mode 100644 index 0000000000000000000000000000000000000000..103bdb67896d5a3bc9d59ba27f2cb342c2765dc9 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041e3197b2ee659930e23a4de987be064e0fcac6b6143ececa96dbd0777e1f6f +size 209045 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json new file mode 100644 index 0000000000000000000000000000000000000000..dc43b26660544f648a7363ab34af01fef1a2a1ee --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4010a4090b0a141c00ab8ed769145b9bb48264318230f509af4ef8e232474a3e +size 226944 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json new file mode 100644 index 0000000000000000000000000000000000000000..f22169f4e36c6adb107d6ac4280900093eb85155 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a7a33d6c4972d184e7bd7f0b8938801f1a5b35bc28abf6642ddbc6d9ab57e7 +size 348370 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json new file mode 100644 index 0000000000000000000000000000000000000000..d76abfc78d5aeed3cc7c18bec1708a566dc7e8d1 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfa94884fed8e7d095c2428e3d07991119c7d6e1b96ca7bfcc46bca46d76252 +size 237802 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json new file mode 100644 index 0000000000000000000000000000000000000000..23a8b1c88c1a019608a4ee54d72c9a803b6069b7 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cf30e9852fec52c2d74d29a96aef444c9a446d139f21f33d0686a081561761 +size 169186 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json new file mode 100644 index 0000000000000000000000000000000000000000..9c2a0a433835eaf41666c795047020d097afc899 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee47111d0ab159b8e4b417900aa31c56ebe3753df77435210b53235dd8c2808 +size 319010 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json new file mode 100644 index 0000000000000000000000000000000000000000..c0007346aaa87b92b9927d29567bb1d1e8317be1 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd082fcc2b35a9d59804c43d420437e08d756a30ed680e1b924e467e50a07db +size 276215 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json new file mode 100644 index 0000000000000000000000000000000000000000..6827845cfa4e978f5244a79fd742e713d32932b3 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94f078b1e532fe1cac49a2266d650054a1bd4499fd8832ca9861da0efa23eda +size 306427 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json new file mode 100644 index 0000000000000000000000000000000000000000..63e2df694384542255414fc5d5836ae3c83d8676 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3369908c36ebf52efd4d404dcd94abb3511e726f1b69a5de8da7022f27ef9285 +size 346447 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json new file mode 100644 index 0000000000000000000000000000000000000000..6a9ce777fc49256acc070c8a69e71842cd3f9eca --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57652a80447511077634120337aa92884516c7026f04c46ce158fae6c7c51a1 +size 206318 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json new file mode 100644 index 0000000000000000000000000000000000000000..f110c55d711a7e7bac7c71a33a198b381ff17eb3 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cefd39e27af2b8606dfe4607f9dc20b088cfc24e2abbd918754813b83fac28 +size 533027 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json new file mode 100644 index 0000000000000000000000000000000000000000..fe50138f79b72df296248a40d35a6fe2990ec807 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a929834c3c602e7789051476649d068e6dba1b9b0b37a74824482e760ef34423 +size 409163 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json new file mode 100644 index 0000000000000000000000000000000000000000..c7477393d4ee1160555411fbf48b1681e1db2f95 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289807e00e862a7d06a124a7a4f52730c90d9276f48d719c9e9365c0791e3761 +size 495612 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json new file mode 100644 index 0000000000000000000000000000000000000000..d7e46e65d25a5805e66b8af95a221f3e76aa9cc4 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a682be4c7c874628ca76d3013a0ef43b5f973c127292ba7a0ecd7729d21006a +size 140597 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json new file mode 100644 index 0000000000000000000000000000000000000000..f0c6faa6ab5e48ca0c730ec4c5c3d86670433749 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f55698bbcb75adb9c230e27c11200f499c221ce2abe7bcd0d878519502f608f1 +size 316781 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json new file mode 100644 index 0000000000000000000000000000000000000000..9b65f7956183adf17727f51c17c1cc022ab1ce03 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c931b2629be08259fe3ae8e9e21afa10726172fcf0c922545ff7cf658d6616 +size 244569 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json new file mode 100644 index 0000000000000000000000000000000000000000..c6208c507668c2e6f32ec26298198c6162a8c522 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff38625a0f342625440fdc25090a7a3d91319c9dd665c86a625f2c14c00c8924 +size 299200 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json new file mode 100644 index 0000000000000000000000000000000000000000..ed8d624cf18be894eb61729a122a702372724d67 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cffea61c52510ad5e8435103cf9aa1694d0e879fb29d73ceb20a367c55407263 +size 230036 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json new file mode 100644 index 0000000000000000000000000000000000000000..69b45b8aa85648bc868aa1948ded27b70cec8614 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850b46812e7067fb03cc5cfa1942a5a57d9df7901cbe2d5b19b4c258a8d96595 +size 215824 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json new file mode 100644 index 0000000000000000000000000000000000000000..0bd67f6c55da123d93f0a9ce21d204f6e7074393 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e80998caa6bf4ea98bd4f3a220a54ca149daf820ca23c74b58db1f01e5caae +size 228986 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json new file mode 100644 index 0000000000000000000000000000000000000000..9462716b227f268782439c68b382257bc576297a --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a58606bd342585ddf74582d5deb267562466dde26935fda8f831608747b74a +size 476151 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json new file mode 100644 index 0000000000000000000000000000000000000000..ffb39e38929a3bfe404d35f8d9233adeed88cfdf --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65ff26d962ced6bdbc7c28b4560c2a2983661bc217cec8662525fd4b9a75fb2 +size 595929 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7112afca122e31b93799730627c6d0f50a56e3 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3b8f4a8554467ac921a38cd52ca793a489fbabfca149dea8da1a28cc4c37ad +size 456036 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 +size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfdb32df891cb4bec1107d5edee9c07d2905009 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 +size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7925f467949e4a6e94ff489f13667b228614944 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json new file mode 100644 index 0000000000000000000000000000000000000000..1751c969329e31c30a86204214e2d70b8a547aab --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 +size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json new file mode 100644 index 0000000000000000000000000000000000000000..bf1146bc7790a7068abf3241a5b5909a51ad32cc --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddd2be628746feeccc04859d54c4133e7f1b3e1964ffa98d533ad318d6d0eb2 +size 95 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json new file mode 100644 index 0000000000000000000000000000000000000000..6531be1d81048cb2559b2b85c579f0bde88d9cf8 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf446327b541d83f38aff236b89cf65d85bf69bfe03b6df7605132398a2afd1b +size 127 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png new file mode 100644 index 0000000000000000000000000000000000000000..e7d3d5e2dd1f61399cbf37a21bbc674e66828764 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebeb8e7a35dfc3b84bd148b60335cefbbbd1f74db88c95e1854749d6ac4b8e23 +size 63234 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json new file mode 100644 index 0000000000000000000000000000000000000000..a5d8ee97ff8de47dd88457bfe4466d9a7d30c37f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cf4f5f1fb12118cfac5d1e8a5cf4a4e2eb9584800b3e32c9277eb11b544f59 +size 326143 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..fc18bbb0c06aaa025f6ecacf78b672a55191c0dd --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c7533a246364b3b3563ed57985cf7458c1129b6a24ccaae28d81c37bb9065ac +size 7798 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow new file mode 100644 index 0000000000000000000000000000000000000000..be6649ca3a733747b34d5f764b21053f719ee649 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4db59fda64595da71953d940ebbb945bd66ba4b078ec81cd8682d4370761eb +size 778488 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..71b5d57556865c53e787cc065886701739e53202 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f617cce2defaf83255893aa927877b864273b53dfd553a67d7f68a24a803af +size 314048 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b56504c71ed317b6e9bad4d42cc4c0949e4c35b --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b +size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e04d37fb0a4a8e9cfaf820d96cac1c5b53a8535 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d1f2ee6f0854a390ac6a6af4958a257144af7fbebcdffca28b3ae70eab713c +size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html new file mode 100644 index 0000000000000000000000000000000000000000..9b69961eedaf6813a142361e28d635099138ed1a --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 +size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json new file mode 100644 index 0000000000000000000000000000000000000000..e228df0d5145a7052b71fa08c77f53a5a7851041 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 +size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json new file mode 100644 index 0000000000000000000000000000000000000000..21ed7c556e0e8c50f85ab7ede1f2d516664bddec --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb2eeb446ee8a05a3c37498d571550950b3849bebc612e32bda31303fd4320f +size 790903 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json new file mode 100644 index 0000000000000000000000000000000000000000..e3a53e59ff0378f6ef65fae23b12fee61b2af2ce --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73615bc7ec4fb0388b01047bc947a1249694c87e5f452e365a12d327d833396b +size 757585 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..2cac616cf58d047563fb01cf07596ec3c1587ea3 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58db6c3bd0c6ccdcc60e0d726244e0632d0f14a0d639597a28d78de059328fc +size 1294 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html new file mode 100644 index 0000000000000000000000000000000000000000..ee447c1d28d6e143e8412f71371f2165d0ec38ef --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e2a534847f537dd917a528ebd172e2d955eef3d543771a757d51c80e0f96147 +size 3693133 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json new file mode 100644 index 0000000000000000000000000000000000000000..b344a3323ace3dac3cbad30446614a7aac25ca0f --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4cbf90b3bf6b84aa83603b2c42797b53b7567553e66e2e5aaec7dbbb322327 +size 107430