diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..590edb6bd09aee3ddc204183bef0215deb677a88 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.jsonl filter=lfs diff=lfs merge=lfs -text +*.json filter=lfs diff=lfs merge=lfs -text +*.csv filter=lfs diff=lfs merge=lfs -text +*.txt filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/NL2TL-dataset/NLTLsummary.json b/NL2TL-dataset/NLTLsummary.json new file mode 100644 index 0000000000000000000000000000000000000000..5d87bb2e9952565bc2302ee62f7d9c05caf323cb --- /dev/null +++ b/NL2TL-dataset/NLTLsummary.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6291896067a809119e01b43734779333c2d6b7baa36376afe8aa6579b4ba77ee +size 23025 diff --git a/NL2TL-dataset/collect/Cleaned_ENG.txt b/NL2TL-dataset/collect/Cleaned_ENG.txt new file mode 100644 index 0000000000000000000000000000000000000000..583bb81503ef6ab39746f2de154eaa3bc9075355 --- /dev/null +++ b/NL2TL-dataset/collect/Cleaned_ENG.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f8e6b696147098eb16be8ec7a2891a493c008e7d997f994d1b77fea4255559 +size 447181 diff --git a/NL2TL-dataset/collect/Cleaned_LTL.txt b/NL2TL-dataset/collect/Cleaned_LTL.txt new file mode 100644 index 0000000000000000000000000000000000000000..29d5eece04300dea13e6e87630484ec2e1c17d77 --- /dev/null +++ b/NL2TL-dataset/collect/Cleaned_LTL.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ced5c8ee114bc50e480c44999e991337318749dd7f5988379c0f2cd3c5940d2 +size 252706 diff --git a/NL2TL-dataset/collect/UNCleaned_ENG.txt b/NL2TL-dataset/collect/UNCleaned_ENG.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0cf71ef6bc0813e6043b85b2d22fa0801a40bfc --- /dev/null +++ b/NL2TL-dataset/collect/UNCleaned_ENG.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29fe1e57c68d9096705eae98f6456aa24eb86ecc2a43fa7896785a30eba1c3d0 +size 79381 diff --git a/NL2TL-dataset/collect/UNCleaned_LTL.txt b/NL2TL-dataset/collect/UNCleaned_LTL.txt new file mode 100644 index 0000000000000000000000000000000000000000..d5a27791f616775b8856d7b5f1a615105e1c94ce --- /dev/null +++ b/NL2TL-dataset/collect/UNCleaned_LTL.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f903103fcdc5b86bf1aca3568fd5483bfab90412fb1581006c0433c4ac0feecc +size 34258 diff --git a/NL2TL-dataset/collect/UNCleaned_num.txt b/NL2TL-dataset/collect/UNCleaned_num.txt new file mode 100644 index 0000000000000000000000000000000000000000..b8a53864f624b5b400b98abe0d2bf6c441443f8a --- /dev/null +++ b/NL2TL-dataset/collect/UNCleaned_num.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50067192dcabdc789ff68998a4b67b44d5ea71ad870b2f3029f06a62e93cd1cc +size 7639 diff --git a/NL2TL-dataset/collect/eng.txt b/NL2TL-dataset/collect/eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e8460db6c584325c53ea7125010814a31de9a8f --- /dev/null +++ b/NL2TL-dataset/collect/eng.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834f288fc7c4bf3c27e829db10d72f673b55551f5e50591e9ff05461b78417e7 +size 384436 diff --git a/NL2TL-dataset/collect/eng_gpt_auged.txt b/NL2TL-dataset/collect/eng_gpt_auged.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d053ca173aaaf74ae9a325b5e55e37beb9e1a93 --- /dev/null +++ b/NL2TL-dataset/collect/eng_gpt_auged.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d039a94cfde70475d1c935e80286d0c3bb9c578f01c86cf8f65aed015fdf14 +size 46038 diff --git a/NL2TL-dataset/collect/eng_gpt_auged2.txt b/NL2TL-dataset/collect/eng_gpt_auged2.txt new file mode 100644 index 0000000000000000000000000000000000000000..5d7860287b1483743b4ad87caf1cbd78fa185e5c --- /dev/null +++ b/NL2TL-dataset/collect/eng_gpt_auged2.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6484a6ace4d0bdd6e00b337aa2e734d4cd3cfdbcd03f90a960ea3859381ffaad +size 96837 diff --git "a/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt" "b/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt" new file mode 100644 index 0000000000000000000000000000000000000000..4119f2a4468b1239684915faf511c744fe39e836 --- /dev/null +++ "b/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74a45639f6d71263ea70768e40a4d299137fca66de36da7657a15cbba60beb9 +size 379834 diff --git a/NL2TL-dataset/collect/idxsrc_gpt_auged.txt b/NL2TL-dataset/collect/idxsrc_gpt_auged.txt new file mode 100644 index 0000000000000000000000000000000000000000..f1750947466989be039e7b3dc6a721b8b0c15acd --- /dev/null +++ b/NL2TL-dataset/collect/idxsrc_gpt_auged.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce789edf73241091f48e1e3a2b4eddf06ae0244fc20d4f050cbed474afd79ab6 +size 4425 diff --git a/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt b/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a72b023e0be446c4a18b890997f631ad1a6a903 --- /dev/null +++ b/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71b50098910d35585cddb30b9fd0187898db7e0ea1ae5cc94a5bf7f100e81a4 +size 9273 diff --git a/NL2TL-dataset/collect/log.jsonl b/NL2TL-dataset/collect/log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a9e970c6df4c5f05f5eca58b1b3ba937724dc64d --- /dev/null +++ b/NL2TL-dataset/collect/log.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a3e9ba897654fe39da61e0fc20687714fb1cd6da68d98eb604731b20d14fce +size 561367 diff --git a/NL2TL-dataset/collect/ltl copy.txt b/NL2TL-dataset/collect/ltl copy.txt new file mode 100644 index 0000000000000000000000000000000000000000..93bffa2415f7c3f074cbe5e4777313eef08f1420 --- /dev/null +++ b/NL2TL-dataset/collect/ltl copy.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c +size 192556 diff --git a/NL2TL-dataset/collect/ltl.txt b/NL2TL-dataset/collect/ltl.txt new file mode 100644 index 0000000000000000000000000000000000000000..93bffa2415f7c3f074cbe5e4777313eef08f1420 --- /dev/null +++ b/NL2TL-dataset/collect/ltl.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c +size 192556 diff --git a/NL2TL-dataset/collect/ltl_eng_1.jsonl b/NL2TL-dataset/collect/ltl_eng_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..16e9200f22ffc30f032c8107d8ccba60e45700b5 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55879344ea92bf9073b1b605dda98cfe551b9169186dde31592d15c9a24d47e4 +size 1043402 diff --git a/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl b/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..66944cc559d56be12a04ec020debe1c1b631e0ca --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61fbb4657c7b6d81d26c6a8fa596f03156dba6308b17b54125f79cd3734c058 +size 1069183 diff --git a/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl b/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a0f04ea97c9f872e07a9ed28cb3a5af031eb7fb9 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd +size 131838 diff --git a/NL2TL-dataset/collect/ltl_eng_test.jsonl b/NL2TL-dataset/collect/ltl_eng_test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a0f04ea97c9f872e07a9ed28cb3a5af031eb7fb9 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd +size 131838 diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..54ab5b2681bad0d32d6420669f8057573a751437 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4e75373318179d3cd3a2bbb330fdfe75376dabe77f475f31d95cf27f4d100c +size 137907 diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b461e2c6ee10aa40028ce6c0e5d86320b2ce683a --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9185fd98062c160c363f35116a943a0950fdd2151bd7ff61cb9df39921ca73f3 +size 137931 diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a5e8262fc36f6db074d742b36d551722f1929077 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca41a9c7ea7dd2151e861ac7754f36eee0e386f7a0dbbf7d2d5cabf927ccb7b +size 125920 diff --git a/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl b/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2cbcfe7989d7b231d254158c1fa27ac3a6d36e87 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279 +size 911564 diff --git a/NL2TL-dataset/collect/ltl_eng_train.jsonl b/NL2TL-dataset/collect/ltl_eng_train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2cbcfe7989d7b231d254158c1fa27ac3a6d36e87 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279 +size 911564 diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..178754ffb1bcf0ce59c0025793c5356931dd5428 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0daf22f37a11d38edaacfd837ccabf59fc7395431166bc50b0336e030faf77d1 +size 937958 diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..571f8a195bc5ee59855ca0a5f8e70c0ae318be35 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3674e9aabc015509efadbe1f8374627d5aaaacfb8fe3c717fa1ff852dcb8c4f +size 937958 diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7130227a4bd581220025cfa9b0e131f0af43cb50 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea13abc2204906d232ea88b78830ad6be09e01dbd5b7ad7ec9da9dff2f6c777 +size 1129386 diff --git a/NL2TL-dataset/collect/ltl_mid_order.txt b/NL2TL-dataset/collect/ltl_mid_order.txt new file mode 100644 index 0000000000000000000000000000000000000000..1201f43dc64624b6851b169ade596ce2a21291f1 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_mid_order.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646500a7231278e0dd4ca0ed6fc2cc71ba4f3b7284a13a720a689a8e634db78e +size 225055 diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f99d173ae876b56448d63f504deeb0d2cc9d192 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_mid_order_ascii.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f56781a159d3a52b58101038f0d15e1ef054b5f0290dd73228e51f4004243dd6 +size 225073 diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt new file mode 100644 index 0000000000000000000000000000000000000000..c36d06dcf99ff87ef245c8163186e0d0ef53efcd --- /dev/null +++ b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f332db20802ecce5a8bd2879316e66061c97d438d18c7cbc6d4899b3bfbcaa44 +size 19575 diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt new file mode 100644 index 0000000000000000000000000000000000000000..709fa5ff35af84230c647383272c28460a358d51 --- /dev/null +++ b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfca7f2b1129e28f89e4dd68d76406ad26404514d08864aaf1c76f3fa01eb132 +size 40805 diff --git a/NL2TL-dataset/collect/note.txt b/NL2TL-dataset/collect/note.txt new file mode 100644 index 0000000000000000000000000000000000000000..e019bc077819de4a67bb77581911674f1b6d6664 --- /dev/null +++ b/NL2TL-dataset/collect/note.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7798d0398769800f3cfc50fc902cec4cd1aa1eff05e3cb41790094c620f61039 +size 59 diff --git a/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..17cca6da44c7ebc99a22dbbb4850f18e70a32b64 --- /dev/null +++ b/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c131c3486f4debdf0ed416248de2892d41627abbf2fcd83c69051b7ba69bfb2b +size 449106 diff --git a/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl b/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..aef9cd294a1c4685e353a7e74e0b3e4b3c432361 --- /dev/null +++ b/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930 +size 1839113 diff --git a/NL2TL-dataset/collect2/LTLexplain_0.json b/NL2TL-dataset/collect2/LTLexplain_0.json new file mode 100644 index 0000000000000000000000000000000000000000..74b05c4d764aad5d3038b7256b9568842bdc3170 --- /dev/null +++ b/NL2TL-dataset/collect2/LTLexplain_0.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08175568f9105c8dd5e8e5413e8a90834ab48b0cd2c8396ca1a71ab97f69ef68 +size 149375 diff --git a/NL2TL-dataset/collect2/LTLexplain_1.json b/NL2TL-dataset/collect2/LTLexplain_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f0fed24fe8d11d073037020ee045d6ebfc620efc --- /dev/null +++ b/NL2TL-dataset/collect2/LTLexplain_1.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a0f1e1bfe11526ca4ad549cf263fcfd60d18d9ee27cf77b889a3d0b8728d37 +size 149123 diff --git a/NL2TL-dataset/collect2/LTLexplain_2.json b/NL2TL-dataset/collect2/LTLexplain_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c4d9b737bc94de11eb8f874735a2bd3a73dcab69 --- /dev/null +++ b/NL2TL-dataset/collect2/LTLexplain_2.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ee6dd1b5f76edc89b57cc4384a7dc1ea1af31940016f844c8a22ee0b98c0e4 +size 82749 diff --git a/NL2TL-dataset/collect2/LTLexplain_3.json b/NL2TL-dataset/collect2/LTLexplain_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8deb947c5e4ab2177c5b4c569cff116f389b9b14 --- /dev/null +++ b/NL2TL-dataset/collect2/LTLexplain_3.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052761b9c005136e13576c0bb4f279ad5c81e9e15abfd736e344028b32e8af89 +size 73448 diff --git a/NL2TL-dataset/collect2/LTLexplain_4.json b/NL2TL-dataset/collect2/LTLexplain_4.json new file mode 100644 index 0000000000000000000000000000000000000000..834b3ed504b974cc90b60ed2680d25967c4b5727 --- /dev/null +++ b/NL2TL-dataset/collect2/LTLexplain_4.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fa922f8b8402b2f950c18de7e2360a0aa7158297251e0ea1fa2d6c26337ccf +size 147624 diff --git a/NL2TL-dataset/collect2/LTLsummary.json b/NL2TL-dataset/collect2/LTLsummary.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b9914651b91c5367d20d85b47157e57727bc66 --- /dev/null +++ b/NL2TL-dataset/collect2/LTLsummary.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11079ebb03aa8d76763c690f7c59431f32c25ce385657b3e68daa1b15c7ff734 +size 22393 diff --git a/NL2TL-dataset/collect2/getUniqueLTL.py b/NL2TL-dataset/collect2/getUniqueLTL.py new file mode 100644 index 0000000000000000000000000000000000000000..4e93b0c1966a344ade2484a1fda66221eef9809a --- /dev/null +++ b/NL2TL-dataset/collect2/getUniqueLTL.py @@ -0,0 +1,20 @@ +import json +import re,os + +def findUniqueLTL(paths:list): + ret={} + for path in paths: + with open(path,'r') as f: + jsonlists=f.readlines() + for jsonlist in jsonlists: + j=json.loads(jsonlist) + ret[j['raw_ltl']]=1 + return ret + +if __name__=='__main__': + path=['/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl'] + LTLs=findUniqueLTL(paths=path) + with open(os.path.join('/home/user/xsj/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f : + f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':'))) + + diff --git a/NL2TL-dataset/collect2/lifted_data.jsonl b/NL2TL-dataset/collect2/lifted_data.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..105beb79da430fbc3d835d1206c2961fda1dcf6e --- /dev/null +++ b/NL2TL-dataset/collect2/lifted_data.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb +size 12458149 diff --git a/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9c784d20c3c5e0f66a030dac769086144d333c7a --- /dev/null +++ b/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41061064f1591c833a29ea73ce1888aaa831b488b0f4f0f2a04994c871a42873 +size 140979 diff --git a/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..08ec7b3a4a8772ddde8b7f2c87ee0f0e3a437b07 --- /dev/null +++ b/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f9856d3e7f3d882de35caf5e3d79e70b783630178e1fcbad197b31431b2bfa +size 1264107 diff --git a/NL2TL-dataset/collect2/navi_total_refined.jsonl b/NL2TL-dataset/collect2/navi_total_refined.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1 --- /dev/null +++ b/NL2TL-dataset/collect2/navi_total_refined.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8 +size 3825939 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..606740db79c1d093b34dcbee0b7302f0e4a9568f --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb @@ -0,0 +1,374 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Augmentation by parapharsing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init & Load Seed Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json, openai\n", + "from tqdm import tqdm " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DOMAIN = \"drone-planning/\"\n", + "# DOMAIN = \"clean-up/\"\n", + "# DOMAIN = \"pick-and-place/\"\n", + "with open(DOMAIN + \"train_seed.jsonl\") as f:\n", + " train_seed = [json.loads(line) for line in f]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eng_seeds = {\n", + " seed['natural']: [] for seed in train_seed\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Augmentation Code\n", + "prompting GPT-3 seems to work the best in this case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You need to set your OPENAI API key here\n", + "# https://beta.openai.com/account/api-keys\n", + "openai.api_key = \"TO_BE_SET\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def normalize(sentence):\n", + " # captialize first letter and add period at the end if not present\n", + " if sentence[0].islower():\n", + " sentence = sentence[0].upper() + sentence[1:]\n", + " if sentence[-1] != '.':\n", + " sentence = sentence + '.'\n", + " return sentence\n", + "\n", + "def parse_sentences_from_response(response):\n", + " lines = response.split('\\n')\n", + " # assert len(lines) == 5\n", + " assert len(lines) == 10\n", + " lines[0] = \"1.\" + lines[0]\n", + " paraphrases = []\n", + " for idx, line in enumerate(lines):\n", + " assert line.startswith(str(idx+1) + '. ')\n", + " sentence_start_idx = len(str(idx+1) + '. ')\n", + " paraphrases.append(line[sentence_start_idx:])\n", + " for paraphrase in paraphrases:\n", + " if paraphrase[-1] == ' ':\n", + " if paraphrase[-2] == '.':\n", + " paraphrase = paraphrase[:-1]\n", + " else:\n", + " paraphrase = paraphrase[:-2] + '.'\n", + " return paraphrases\n", + "\n", + "\n", + "PROMPT = \"\"\"Rephrase the source sentence in 10 different ways. Make the outputs as diverse as possible.\n", + "\n", + "Source: \n", + "SOURCE-TO-BE-PLACED\n", + "\n", + "Outputs:\n", + "1.\"\"\"\n", + "def rephrase_a_sentence(sentence):\n", + " response = openai.Completion.create(\n", + " model=\"text-davinci-002\",\n", + " prompt=PROMPT.replace(\"SOURCE-TO-BE-PLACED\", normalize(sentence)),\n", + " temperature=0.7,\n", + " max_tokens=512,\n", + " top_p=1,\n", + " best_of=1,\n", + " frequency_penalty=0.1,\n", + " presence_penalty=0\n", + " )\n", + " output = response['choices'][0]['text']\n", + " try:\n", + " paraphrases = parse_sentences_from_response(output)\n", + " except:\n", + " print(\"Error in parsing response\")\n", + " print(output)\n", + " return output, \"ERROR\"\n", + " return parse_sentences_from_response(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "O = rephrase_a_sentence(\"Go to the red room or go to the green room to finally go to the blue room.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "O" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run Augmentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(eng_seeds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(eng_seeds.keys())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def paraphrase_done(eng_seeds):\n", + " for eng_seed, extended in tqdm(eng_seeds.items()):\n", + " if len(extended) == 0:\n", + " return False\n", + " return True\n", + "\n", + "while not paraphrase_done(eng_seeds):\n", + " for eng_seed, extended in tqdm(eng_seeds.items()):\n", + " if len(extended) == 0:\n", + " extended += rephrase_a_sentence(eng_seed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eng_seeds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dump as Training Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_seed[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n", + " for seed in train_seed:\n", + " f.write(json.dumps(seed) + '\\n')\n", + " for aug_eng in eng_seeds[seed['natural']]:\n", + " f.write(json.dumps({\n", + " 'natural': aug_eng,\n", + " 'canonical': seed['canonical'],\n", + " 'formula': seed['formula']\n", + " }) + '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n", + " for seed in train_seed:\n", + " f.write(json.dumps(seed) + '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normalize the natural language form " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if DOMAIN == \"clean-up/\":\n", + " # in clean up, golden natural language data comes without period at the end, no capitalization in the beginning\n", + " def clean_up_normalize(sentence):\n", + " if sentence[0].isupper():\n", + " sentence = sentence[0].lower() + sentence[1:]\n", + " if sentence[-1] == '.':\n", + " sentence = sentence[:-1]\n", + " return sentence\n", + "\n", + " buffer = []\n", + " with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n", + " for l in f.readlines():\n", + " buffer.append(json.loads(l))\n", + " \n", + " with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n", + " for dp in buffer:\n", + " f.write(json.dumps({\n", + " 'natural': clean_up_normalize(dp['natural']),\n", + " 'canonical': dp['canonical'],\n", + " 'formula': dp['formula']\n", + " }) + '\\n')\n", + "\n", + "if DOMAIN == \"pick-and-place/\":\n", + " # in pick and place, golden natural language data comes without period at the end, no capitalization in the beginning\n", + " def clean_up_normalize(sentence):\n", + " if sentence[0].isupper():\n", + " sentence = sentence[0].lower() + sentence[1:]\n", + " if sentence[-1] == '.':\n", + " sentence = sentence[:-1]\n", + " return sentence\n", + "\n", + " buffer = []\n", + " with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n", + " for l in f.readlines():\n", + " buffer.append(json.loads(l))\n", + " \n", + " with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n", + " for dp in buffer:\n", + " f.write(json.dumps({\n", + " 'natural': clean_up_normalize(dp['natural']),\n", + " 'canonical': dp['canonical'],\n", + " 'formula': dp['formula']\n", + " }) + '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if DOMAIN == \"drone-planning/\":\n", + " # in clean up, golden natural language data comes with a \"space + period\" at the end, no capitalization in the beginning\n", + " def clean_up_normalize(sentence):\n", + " if sentence[0].isupper():\n", + " sentence = sentence[0].lower() + sentence[1:]\n", + " while sentence[-1] == ' ' or sentence[-1] == '.' or sentence[-1] == '!':\n", + " sentence = sentence[:-1]\n", + " sentence = sentence + '.'\n", + " sentence = sentence.replace('.', ' .')\n", + " sentence = sentence.replace(',', ' ,')\n", + " return sentence\n", + "\n", + " buffer = []\n", + " # with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n", + " # for l in f.readlines():\n", + " # buffer.append(json.loads(l))\n", + " \n", + " # with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n", + " # for dp in buffer:\n", + " # f.write(json.dumps({\n", + " # 'natural': clean_up_normalize(dp['natural']),\n", + " # 'canonical': dp['canonical'],\n", + " # 'formula': dp['formula']\n", + " # }) + '\\n')\n", + " with open(DOMAIN + \"syn.train.jsonl\", 'r') as f:\n", + " for l in f.readlines():\n", + " buffer.append(json.loads(l))\n", + " \n", + " with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n", + " for dp in buffer:\n", + " f.write(json.dumps({\n", + " 'natural': clean_up_normalize(dp['natural']),\n", + " 'canonical': dp['canonical'],\n", + " 'formula': dp['formula']\n", + " }) + '\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "GPML", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "75567ad983eac98a78c1e40a895e8d82557b42cf9969286235abec07ddbf9e7d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json new file mode 100644 index 0000000000000000000000000000000000000000..10b02e56e0fe39919f62150cf1214f3a07791447 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfa917383f4e67d55a159050ffa60c83dea987236ef516edf7d2e45e16689d3 +size 7890 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json new file mode 100644 index 0000000000000000000000000000000000000000..85aa28666ff922102299444a0e2a1bfbc04591d1 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7554f76c648d84596c8a77458bd615550be66d0903143146a8497797205315f4 +size 12442 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..57e0f01e18ba7e2df9437748bf855882516fd7d2 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cc3906e8d32f3a1bad606ac8d262608065aa6ffbe3065fdf98b4b1fb754bc7 +size 599574 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..522f04596a0442e01ecbaeecb971bf7ac539bbcc --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d865ee0706e420cc35295a9c237e2ea3eceb0e0f87c7b1739013dd9448c73de1 +size 866586 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..20d7897135a49e09eac8314096c764515dbf1c81 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"raw/ltl-lifted.txt\", \"r\") as f:\n", + " raw_ltls = f.read().splitlines()\n", + " unique_ltls = set(raw_ltls)\n", + "with open(\"raw/eng-lifted.txt\", 'r') as f:\n", + " raw_engs = f.read().splitlines()\n", + "\n", + "DPs = []\n", + "for ltl, eng in zip(raw_ltls, raw_engs):\n", + " DPs.append({'ltl': ltl, 'eng': eng})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F P04\n", + "F P02\n", + "F P01\n", + "F P03\n", + "F & P02 F Z\n", + "F & P02 F X\n", + "F & P04 F P01\n", + "F & P03 F P01\n", + "F & P04 F P02\n", + "F & P04 F P03\n", + "F & P01 F P02\n", + "F & P01 F P04\n", + "F & P02 F P03\n", + "F & P02 F P01\n", + "F & P03 F P04\n", + "F & P02 F P04\n", + "F & P01 F P03\n", + "& F P04 G ! P01\n", + "& F P04 G ! P03\n", + "& F P04 G ! P02\n", + "& F P01 G ! P02\n", + "& F P03 G ! P04\n", + "& F P01 G ! P04\n", + "& F P02 G ! P01\n", + "& F P01 G ! P03\n", + "& F P03 G ! P02\n", + "& F P03 G ! P01\n", + "& F P02 G ! P04\n", + "& F P02 G ! P03\n", + "F & | P02 P03 F P04\n", + "F & | P02 P04 F P01\n", + "F & | P01 P03 F P04\n", + "F & | P03 P01 F P04\n", + "F & | P02 P01 F P03\n", + "F & | P02 P01 F P04\n", + "F & | P04 P02 F P01\n", + "F & | P04 P03 F P01\n", + "F & | P04 P03 F P02\n", + "F & | P02 P03 F P01\n" + ] + } + ], + "source": [ + "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n", + " print(ltl)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Clean-up Domain, with augmentation\n", + "Number of Data Points 3382\n", + "Number of unique LTL expressions: 39\n", + "Number of unique LTL structures: 4\n" + ] + } + ], + "source": [ + "print(\"Clean-up Domain, with augmentation\")\n", + "print(\"Number of Data Points\", len(DPs))\n", + "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n", + "print(\"Number of unique LTL structures:\", 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F & P01 F P04\n", + "Prop_1 keep going and stop when you reach Prop_4\n", + "\n", + "F P02\n", + "move to Prop_2\n", + "\n", + "& F P04 G ! P03\n", + "go only through rooms that are not Prop_3 to get to Prop_4\n", + "\n", + "F & P02 F X\n", + "move into Prop_2 and push the chair back into the purple room\n", + "\n", + "F & | P02 P03 F P01\n", + "go through the Prop_3 or Prop_2 to reach the Prop_1\n", + "\n" + ] + } + ], + "source": [ + "seen_length = set()\n", + "for dp in DPs:\n", + " if len(dp['ltl']) not in seen_length:\n", + " seen_length.add(len(dp['ltl']))\n", + " print(dp['ltl'])\n", + " print(dp['eng'])\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up LTL expressions " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "Room_Types = [('P01', 'Prop_1'), ('P02', 'Prop_2'), ('P03', 'Prop_3'), ('P04', 'Prop_4')]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "APs = [\n", + " {'ap': 'P01', 'eng': 'Prop_1'},\n", + " {'ap': 'P02', 'eng': 'Prop_2'},\n", + " {'ap': 'P03', 'eng': 'Prop_3'},\n", + " {'ap': 'P04', 'eng': 'Prop_4'},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def build_type_A(ap1):\n", + " return {\n", + " 'raw_ltl': f\"F {ap1['ap']}\",\n", + " 'canonical_ltl': f\"finally {ap1['eng']}\",\n", + " 'eng': f\"{ap1['eng']}\"}\n", + "\n", + "def build_type_B(room_1, room_2):\n", + " return {\n", + " \"raw_ltl\": f\"F & {room_1['ap']} F {room_2['ap']}\",\n", + " \"canonical_ltl\": f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\",\n", + " \"eng\": f\"{room_1['eng']} first, and then {room_2['eng']}\"}\n", + "\n", + "def build_type_C(room_1, room_2):\n", + " return {\n", + " \"raw_ltl\": f\"& F {room_1['ap']} G ! {room_2['ap']}\",\n", + " \"canonical_ltl\": f\"and ( finally ( {room_1['eng']} ) , globally ( not ( {room_2['eng']} ) ) )\",\n", + " \"eng\": f\"{room_1['eng']}, and do not ever {room_2['eng']}\"\n", + " }\n", + "\n", + "def build_type_D(room_1, room_2, room_3):\n", + " return {\n", + " \"raw_ltl\": f\"F & | {room_1['ap']} {room_2['ap']} F {room_3['ap']}\",\n", + " \"canonical_ltl\": f\"finally ( and ( or ( {room_1['eng']} , {room_2['eng']} ) , finally ( {room_3['eng']} ) ) )\",\n", + " \"eng\": f\"{room_1['eng']} or {room_2['eng']} to finally {room_3['eng']}\"\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "translation_seeds = []\n", + "for r1 in APs:\n", + " translation_seeds.append(build_type_A(r1))\n", + " for r2 in APs:\n", + " if r1 == r2:\n", + " continue\n", + " translation_seeds.append(build_type_B(r1, r2))\n", + " translation_seeds.append(build_type_C(r1, r2))\n", + " for r3 in APs:\n", + " if r1 == r3 or r2 == r3:\n", + " continue\n", + " translation_seeds.append(build_type_D(r1, r2, r3))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "52\n" + ] + } + ], + "source": [ + "print(len(translation_seeds))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'F & P02 F X', 'F & P02 F Z'}\n" + ] + } + ], + "source": [ + "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", + "print(unique_ltls - seed_ltls)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "additional_seed_1 = {\n", + " \"raw_ltl\": \"F & P02 F X\",\n", + " \"canonical_ltl\": \"finally ( and ( Prop_2 , finally ( Prop_1 ) ) )\",\n", + " \"eng\": \"Prop_2 and Prop_1\"\n", + "}\n", + "\n", + "additional_seed_2 = {\n", + " \"raw_ltl\": \"F & P02 F Z\",\n", + " \"canonical_ltl\": \"finally ( and ( Prop_2 , finally ( Prop_4 ) ) )\",\n", + " \"eng\": \"Prop_2 and Prop_4\"\n", + "}\n", + "\n", + "translation_seeds.append(additional_seed_1)\n", + "translation_seeds.append(additional_seed_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "54" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(translation_seeds)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set()\n" + ] + } + ], + "source": [ + "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", + "print(unique_ltls - seed_ltls)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the canonical decoding list" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "possible_decodings = {}\n", + "for seed in translation_seeds:\n", + " canonical = seed['canonical_ltl']\n", + " possible_decodings[canonical] = {\n", + " 'formula': canonical,\n", + " 'raw': seed['raw_ltl'],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"canonical-lifted.json\", 'w') as f:\n", + " f.write(json.dumps(possible_decodings, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save translation seed for zero shot" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"train_seed-lifted.jsonl\", \"w\") as f:\n", + " for dp in translation_seeds:\n", + " better_ltl = dp['canonical_ltl']\n", + " entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save golden data for evaluation " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "raw_canonical_mapping = {\n", + " seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'F P01': 'finally Prop_1',\n", + " 'F & P01 F P02': 'finally ( and ( Prop_1 , finally ( Prop_2 ) ) )',\n", + " '& F P01 G ! P02': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_2 ) ) )',\n", + " 'F & | P01 P02 F P03': 'finally ( and ( or ( Prop_1 , Prop_2 ) , finally ( Prop_3 ) ) )',\n", + " 'F & | P01 P02 F P04': 'finally ( and ( or ( Prop_1 , Prop_2 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P01 F P03': 'finally ( and ( Prop_1 , finally ( Prop_3 ) ) )',\n", + " '& F P01 G ! P03': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_3 ) ) )',\n", + " 'F & | P01 P03 F P02': 'finally ( and ( or ( Prop_1 , Prop_3 ) , finally ( Prop_2 ) ) )',\n", + " 'F & | P01 P03 F P04': 'finally ( and ( or ( Prop_1 , Prop_3 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P01 F P04': 'finally ( and ( Prop_1 , finally ( Prop_4 ) ) )',\n", + " '& F P01 G ! P04': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_4 ) ) )',\n", + " 'F & | P01 P04 F P02': 'finally ( and ( or ( Prop_1 , Prop_4 ) , finally ( Prop_2 ) ) )',\n", + " 'F & | P01 P04 F P03': 'finally ( and ( or ( Prop_1 , Prop_4 ) , finally ( Prop_3 ) ) )',\n", + " 'F P02': 'finally Prop_2',\n", + " 'F & P02 F P01': 'finally ( and ( Prop_2 , finally ( Prop_1 ) ) )',\n", + " '& F P02 G ! P01': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_1 ) ) )',\n", + " 'F & | P02 P01 F P03': 'finally ( and ( or ( Prop_2 , Prop_1 ) , finally ( Prop_3 ) ) )',\n", + " 'F & | P02 P01 F P04': 'finally ( and ( or ( Prop_2 , Prop_1 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P02 F P03': 'finally ( and ( Prop_2 , finally ( Prop_3 ) ) )',\n", + " '& F P02 G ! P03': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_3 ) ) )',\n", + " 'F & | P02 P03 F P01': 'finally ( and ( or ( Prop_2 , Prop_3 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P02 P03 F P04': 'finally ( and ( or ( Prop_2 , Prop_3 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P02 F P04': 'finally ( and ( Prop_2 , finally ( Prop_4 ) ) )',\n", + " '& F P02 G ! P04': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_4 ) ) )',\n", + " 'F & | P02 P04 F P01': 'finally ( and ( or ( Prop_2 , Prop_4 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P02 P04 F P03': 'finally ( and ( or ( Prop_2 , Prop_4 ) , finally ( Prop_3 ) ) )',\n", + " 'F P03': 'finally Prop_3',\n", + " 'F & P03 F P01': 'finally ( and ( Prop_3 , finally ( Prop_1 ) ) )',\n", + " '& F P03 G ! P01': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_1 ) ) )',\n", + " 'F & | P03 P01 F P02': 'finally ( and ( or ( Prop_3 , Prop_1 ) , finally ( Prop_2 ) ) )',\n", + " 'F & | P03 P01 F P04': 'finally ( and ( or ( Prop_3 , Prop_1 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P03 F P02': 'finally ( and ( Prop_3 , finally ( Prop_2 ) ) )',\n", + " '& F P03 G ! P02': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_2 ) ) )',\n", + " 'F & | P03 P02 F P01': 'finally ( and ( or ( Prop_3 , Prop_2 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P03 P02 F P04': 'finally ( and ( or ( Prop_3 , Prop_2 ) , finally ( Prop_4 ) ) )',\n", + " 'F & P03 F P04': 'finally ( and ( Prop_3 , finally ( Prop_4 ) ) )',\n", + " '& F P03 G ! P04': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_4 ) ) )',\n", + " 'F & | P03 P04 F P01': 'finally ( and ( or ( Prop_3 , Prop_4 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P03 P04 F P02': 'finally ( and ( or ( Prop_3 , Prop_4 ) , finally ( Prop_2 ) ) )',\n", + " 'F P04': 'finally Prop_4',\n", + " 'F & P04 F P01': 'finally ( and ( Prop_4 , finally ( Prop_1 ) ) )',\n", + " '& F P04 G ! P01': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_1 ) ) )',\n", + " 'F & | P04 P01 F P02': 'finally ( and ( or ( Prop_4 , Prop_1 ) , finally ( Prop_2 ) ) )',\n", + " 'F & | P04 P01 F P03': 'finally ( and ( or ( Prop_4 , Prop_1 ) , finally ( Prop_3 ) ) )',\n", + " 'F & P04 F P02': 'finally ( and ( Prop_4 , finally ( Prop_2 ) ) )',\n", + " '& F P04 G ! P02': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_2 ) ) )',\n", + " 'F & | P04 P02 F P01': 'finally ( and ( or ( Prop_4 , Prop_2 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P04 P02 F P03': 'finally ( and ( or ( Prop_4 , Prop_2 ) , finally ( Prop_3 ) ) )',\n", + " 'F & P04 F P03': 'finally ( and ( Prop_4 , finally ( Prop_3 ) ) )',\n", + " '& F P04 G ! P03': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_3 ) ) )',\n", + " 'F & | P04 P03 F P01': 'finally ( and ( or ( Prop_4 , Prop_3 ) , finally ( Prop_1 ) ) )',\n", + " 'F & | P04 P03 F P02': 'finally ( and ( or ( Prop_4 , Prop_3 ) , finally ( Prop_2 ) ) )',\n", + " 'F & P02 F X': 'finally ( and ( Prop_2 , finally ( Prop_1 ) ) )',\n", + " 'F & P02 F Z': 'finally ( and ( Prop_2 , finally ( Prop_4 ) ) )'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_canonical_mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3382" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(DPs)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ltl': 'F & P01 F P04',\n", + " 'eng': 'Prop_1 keep going and stop when you reach Prop_4'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DPs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"golden-lifted.jsonl\", \"w\") as f:\n", + " for dp in DPs:\n", + " entry = {\n", + " 'canonical': raw_canonical_mapping[dp['ltl']],\n", + " 'formula': raw_canonical_mapping[dp['ltl']],\n", + " 'natural': dp['eng'],\n", + " 'raw_ltl': dp['ltl'],\n", + " }\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('GPML')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..908f836237fc46a2ff5822e68a4c90eda125c2a2 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1397a0bea0699ad7464d48c1ae20497389dca87d8a31f82463e81249c02f5a53 +size 123277 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e61184a8d2f34e8e7cdda36a959ff3892ad3be2 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:422d08898216ade2b1f301f07871a6c7ebd267de43d9bb5c57529799bb3ae048 +size 44583 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9936b7564a8f538ae238080e2ab50b53524dcace --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa23e9cdf5c8e4e363ddd63cd09d62bb0b8064335d224cdb6d3f0f428756d764 +size 8518 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..83248c03dd298606a9c15e14c7f04f12ef9cc872 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45f3476cc3c731fae04bb82ae8496ba1bb9c5b88eeff9e28307fc58372c012a +size 15301 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json new file mode 100644 index 0000000000000000000000000000000000000000..36e46a293620fda68c6c3c2d91098156085da0c8 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c09ca4c086b268eae4539e446b10d22a887805fc5295b0b9dae0bab0e82ee47 +size 73886 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5a96c1d0f217c2a2c3cea7a033f4dc0830ecec77 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0b3ed2687a4a4f0a42898a522d4e682fcd2fcfbef98c1554517e72e880720f +size 1187504 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f4cf258de2c382cf339c37ed4de6e513652541e6 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"raw/ltl.txt\", \"r\") as f:\n", + " raw_ltls = f.read().splitlines()\n", + " unique_ltls = set(raw_ltls)\n", + "with open(\"raw/eng.txt\", 'r') as f:\n", + " raw_engs = f.read().splitlines()\n", + "\n", + "DPs = []\n", + "for ltl, eng in zip(raw_ltls, raw_engs):\n", + " DPs.append({'ltl': ltl, 'eng': eng})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F ( P04 )\n", + "F ( P01 )\n", + "F ( P10 )\n", + "F ( P11 )\n", + "F ( P12 )\n", + "F ( P07 )\n", + "F ( P05 )\n", + "F ( P08 )\n", + "F ( P02 )\n", + "F ( P09 )\n", + "F ( P06 )\n", + "F ( P03 )\n", + "F ( P09 & ~ P01 )\n", + "F ( P12 & ~ P04 )\n", + "F ( P06 & ~ P05 )\n", + "F ( P03 & ~ P02 )\n", + "F ( P06 & ~ P04 )\n", + "F ( P08 & ~ P12 )\n", + "F ( P02 & ~ P11 )\n", + "F ( P04 & ~ P05 )\n", + "F ( P11 & ~ P02 )\n", + "F ( P12 & ~ P03 )\n", + "F ( P07 & ~ P04 )\n", + "F ( P11 & ~ P12 )\n", + "F ( P07 & ~ P06 )\n", + "F ( P04 & ~ P10 )\n", + "F ( P08 & ~ P10 )\n", + "F ( P09 & ~ P05 )\n", + "F ( P04 & ~ P03 )\n", + "F ( P04 & ~ P11 )\n", + "F ( P02 & ~ P05 )\n", + "F ( P01 & ~ P10 )\n", + "F ( P12 & ~ P05 )\n", + "F ( P11 & ~ P06 )\n", + "F ( P03 & ~ P11 )\n", + "F ( P06 & ~ P11 )\n", + "F ( P04 & ~ P06 )\n", + "F ( P08 & ~ P11 )\n", + "F ( P11 & ~ P05 )\n", + "F ( P10 & ~ P05 )\n", + "F ( P12 & ~ P10 )\n", + "F ( P07 & ~ P10 )\n", + "F ( P08 & ~ P04 )\n", + "F ( P08 & ~ P03 )\n", + "F ( P07 & ~ P02 )\n", + "F ( P04 & ~ P12 )\n", + "F ( P07 & ~ P01 )\n", + "F ( P10 & ~ P01 )\n", + "F ( P06 & ~ P12 )\n", + "F ( P07 & ~ P03 )\n", + "F ( P11 & ~ P04 )\n", + "F ( P08 & ~ P02 )\n", + "F ( P04 & ~ P02 )\n", + "F ( P12 & ~ P02 )\n", + "F ( P11 & ~ P03 )\n", + "F ( P02 & ~ P01 )\n", + "F ( P07 & ~ P11 )\n", + "F ( P06 & ~ P10 )\n", + "F ( P09 & ~ P10 )\n", + "F ( P10 & ~ P06 )\n", + "F ( P02 & ~ P12 )\n", + "F ( P09 & ~ P11 )\n", + "F ( P07 & ~ P05 )\n", + "F ( P04 & ~ P01 )\n", + "F ( P09 & ~ P03 )\n", + "F ( P03 & ~ P06 )\n", + "F ( P08 & ~ P01 )\n", + "F ( P09 & ~ P06 )\n", + "F ( P03 & ~ P01 )\n", + "F ( P05 & ~ P03 )\n", + "F ( P03 & ~ P05 )\n", + "F ( P06 & ~ P01 )\n", + "F ( P05 & ~ P10 )\n", + "F ( P12 & ~ P01 )\n", + "F ( P11 & ~ P10 )\n", + "F ( P03 & ~ P12 )\n", + "F ( P03 & ~ P04 )\n", + "F ( P01 & ~ P11 )\n", + "F ( P05 & ~ P02 )\n", + "F ( P02 & ~ P03 )\n", + "F ( P02 & ~ P06 )\n", + "F ( P07 & ~ P12 )\n", + "F ( P05 & ~ P11 )\n", + "F ( P05 & ~ P06 )\n", + "F ( P05 & ~ P12 )\n", + "F ( P05 & ~ P01 )\n", + "F ( P02 & ~ P04 )\n", + "F ( P02 & ~ P10 )\n", + "F ( P08 & ~ P05 )\n", + "F ( P06 & ~ P03 )\n", + "F ( P10 & ~ P12 )\n", + "F ( P08 & ~ P06 )\n", + "F ( P10 & ~ P03 )\n", + "F ( P12 & ~ P06 )\n", + "F ( P09 & ~ P04 )\n", + "F ( P10 & ~ P02 )\n", + "F ( P05 & ~ P04 )\n", + "F ( P11 & ~ P01 )\n", + "F ( P09 & ~ P02 )\n", + "F ( P10 & ~ P04 )\n", + "F ( P06 & ~ P02 )\n", + "F ( P01 & ~ P12 )\n", + "F ( P03 & ~ P10 )\n", + "F ( P09 & ~ P12 )\n", + "~ ( P04 ) U ( P12 )\n", + "~ ( P03 ) U ( P07 )\n", + "~ ( P12 ) U ( P02 )\n", + "~ ( P05 ) U ( P02 )\n", + "~ ( P12 ) U ( P05 )\n", + "~ ( P06 ) U ( P11 )\n", + "~ ( P06 ) U ( P07 )\n", + "~ ( P12 ) U ( P06 )\n", + "~ ( P03 ) U ( P12 )\n", + "~ ( P01 ) U ( P07 )\n", + "~ ( P04 ) U ( P02 )\n", + "~ ( P02 ) U ( P09 )\n", + "~ ( P05 ) U ( P10 )\n", + "~ ( P04 ) U ( P05 )\n", + "~ ( P06 ) U ( P08 )\n", + "~ ( P06 ) U ( P05 )\n", + "~ ( P04 ) U ( P03 )\n", + "~ ( P03 ) U ( P09 )\n", + "~ ( P10 ) U ( P01 )\n", + "~ ( P03 ) U ( P10 )\n", + "~ ( P11 ) U ( P04 )\n", + "~ ( P06 ) U ( P12 )\n", + "~ ( P06 ) U ( P10 )\n", + "~ ( P02 ) U ( P06 )\n", + "~ ( P05 ) U ( P07 )\n", + "~ ( P04 ) U ( P01 )\n", + "~ ( P02 ) U ( P11 )\n", + "~ ( P02 ) U ( P10 )\n", + "~ ( P02 ) U ( P08 )\n", + "~ ( P05 ) U ( P04 )\n", + "~ ( P10 ) U ( P11 )\n", + "~ ( P12 ) U ( P11 )\n", + "~ ( P10 ) U ( P04 )\n", + "~ ( P01 ) U ( P10 )\n", + "~ ( P03 ) U ( P08 )\n", + "~ ( P06 ) U ( P09 )\n", + "~ ( P02 ) U ( P07 )\n", + "~ ( P01 ) U ( P11 )\n", + "~ ( P05 ) U ( P01 )\n", + "~ ( P03 ) U ( P04 )\n", + "~ ( P02 ) U ( P03 )\n", + "~ ( P12 ) U ( P04 )\n", + "~ ( P04 ) U ( P10 )\n", + "~ ( P10 ) U ( P12 )\n", + "~ ( P11 ) U ( P06 )\n", + "~ ( P06 ) U ( P02 )\n", + "~ ( P02 ) U ( P05 )\n", + "~ ( P05 ) U ( P12 )\n", + "~ ( P03 ) U ( P06 )\n", + "~ ( P04 ) U ( P07 )\n", + "~ ( P11 ) U ( P05 )\n", + "~ ( P04 ) U ( P08 )\n", + "~ ( P12 ) U ( P03 )\n", + "~ ( P04 ) U ( P06 )\n", + "~ ( P02 ) U ( P04 )\n", + "~ ( P06 ) U ( P04 )\n", + "~ ( P03 ) U ( P02 )\n", + "~ ( P12 ) U ( P01 )\n", + "~ ( P02 ) U ( P12 )\n", + "~ ( P11 ) U ( P03 )\n", + "~ ( P11 ) U ( P01 )\n", + "~ ( P03 ) U ( P11 )\n", + "~ ( P04 ) U ( P09 )\n", + "~ ( P11 ) U ( P02 )\n", + "~ ( P10 ) U ( P05 )\n", + "~ ( P03 ) U ( P05 )\n", + "~ ( P12 ) U ( P10 )\n", + "~ ( P06 ) U ( P03 )\n", + "~ ( P05 ) U ( P06 )\n", + "~ ( P05 ) U ( P11 )\n", + "~ ( P04 ) U ( P11 )\n", + "~ ( P10 ) U ( P03 )\n", + "~ ( P01 ) U ( P12 )\n", + "~ ( P05 ) U ( P08 )\n", + "~ ( P10 ) U ( P06 )\n", + "~ ( P01 ) U ( P09 )\n", + "~ ( P05 ) U ( P03 )\n", + "~ ( P05 ) U ( P09 )\n", + "~ ( P06 ) U ( P01 )\n", + "~ ( P03 ) U ( P01 )\n", + "~ ( P02 ) U ( P01 )\n", + "~ ( P10 ) U ( P02 )\n", + "~ ( P01 ) U ( P08 )\n", + "F ( P10 & F ( P04 ) )\n", + "F ( P12 ) & G ( P04 )\n", + "F ( P03 & F ( P02 ) )\n", + "F ( P10 ) & G ( P07 )\n", + "F ( P10 ) & G ( P01 )\n", + "F ( P10 & F ( P02 ) )\n", + "F ( P07 & F ( P06 ) )\n", + "F ( P04 & F ( P07 ) )\n", + "F ( P12 ) & G ( P02 )\n", + "F ( P02 & F ( P03 ) )\n", + "F ( P03 ) & G ( P08 )\n", + "F ( P08 & F ( P11 ) )\n", + "F ( P07 & F ( P01 ) )\n", + "F ( P12 & F ( P02 ) )\n", + "F ( P01 ) & G ( P09 )\n", + "F ( P12 & F ( P11 ) )\n", + "F ( P10 & F ( P03 ) )\n", + "F ( P06 ) & G ( P07 )\n", + "F ( P12 ) & G ( P08 )\n", + "F ( P11 & F ( P02 ) )\n", + "F ( P10 & F ( P08 ) )\n", + "F ( P03 & F ( P10 ) )\n", + "F ( P10 & F ( P05 ) )\n", + "F ( P06 ) & G ( P09 )\n", + "F ( P02 & F ( P04 ) )\n", + "F ( P07 & F ( P03 ) )\n", + "F ( P12 ) & G ( P05 )\n", + "F ( P06 & F ( P04 ) )\n", + "F ( P04 ) & G ( P08 )\n", + "F ( P12 ) & G ( P03 )\n", + "F ( P01 & F ( P07 ) )\n", + "F ( P05 & F ( P09 ) )\n", + "F ( P06 ) & G ( P08 )\n", + "F ( P09 & F ( P02 ) )\n", + "F ( P12 & F ( P06 ) )\n", + "F ( P01 & F ( P03 ) )\n", + "F ( P08 & F ( P12 ) )\n", + "F ( P03 & F ( P07 ) )\n", + "F ( P11 & F ( P03 ) )\n", + "F ( P10 ) & G ( P04 )\n", + "F ( P12 ) & G ( P07 )\n", + "F ( P01 & F ( P11 ) )\n", + "F ( P03 & F ( P09 ) )\n", + "F ( P10 ) & G ( P02 )\n", + "F ( P04 ) & G ( P07 )\n", + "F ( P09 & F ( P10 ) )\n", + "F ( P01 & F ( P08 ) )\n", + "F ( P10 & F ( P01 ) )\n", + "F ( P01 & F ( P10 ) )\n", + "F ( P08 & F ( P01 ) )\n", + "F ( P03 & F ( P06 ) )\n", + "F ( P07 & F ( P10 ) )\n", + "F ( P09 & F ( P07 ) )\n", + "F ( P10 & F ( P12 ) )\n", + "F ( P12 ) & G ( P01 )\n", + "F ( P10 & F ( P09 ) )\n", + "F ( P08 & F ( P06 ) )\n", + "F ( P07 & F ( P04 ) )\n", + "F ( P02 & F ( P10 ) )\n", + "F ( P11 ) & G ( P03 )\n", + "F ( P01 & F ( P02 ) )\n", + "F ( P09 & F ( P11 ) )\n", + "F ( P11 ) & G ( P08 )\n", + "F ( P10 ) & G ( P06 )\n", + "F ( P07 & F ( P02 ) )\n", + "F ( P04 & F ( P02 ) )\n", + "F ( P12 & F ( P01 ) )\n", + "F ( P12 ) & G ( P09 )\n", + "F ( P03 & F ( P08 ) )\n", + "F ( P11 ) & G ( P02 )\n", + "F ( P02 & F ( P05 ) )\n", + "F ( P01 & F ( P04 ) )\n", + "F ( P03 & F ( P05 ) )\n", + "F ( P06 & F ( P05 ) )\n", + "F ( P02 ) & G ( P09 )\n", + "F ( P09 & F ( P05 ) )\n", + "F ( P06 & F ( P09 ) )\n", + "F ( P01 & F ( P12 ) )\n", + "F ( P05 & F ( P10 ) )\n", + "F ( P05 ) & G ( P09 )\n", + "F ( P04 & F ( P10 ) )\n", + "F ( P05 & F ( P03 ) )\n", + "F ( P09 & F ( P06 ) )\n", + "F ( P11 ) & G ( P09 )\n", + "F ( P12 & F ( P08 ) )\n", + "F ( P10 & F ( P11 ) )\n", + "F ( P06 & F ( P07 ) )\n", + "F ( P06 & F ( P12 ) )\n", + "F ( P05 & F ( P11 ) )\n", + "F ( P08 & F ( P03 ) )\n", + "F ( P08 & F ( P10 ) )\n", + "F ( P10 & F ( P07 ) )\n", + "F ( P03 ) & G ( P07 )\n", + "F ( P01 ) & G ( P08 )\n", + "F ( P08 & F ( P02 ) )\n", + "F ( P06 & F ( P10 ) )\n", + "F ( P10 & F ( P06 ) )\n", + "F ( P11 ) & G ( P01 )\n", + "F ( P09 & F ( P03 ) )\n", + "F ( P08 & F ( P04 ) )\n", + "F ( P02 ) & G ( P08 )\n", + "F ( P12 & F ( P05 ) )\n", + "F ( P03 & F ( P12 ) )\n", + "F ( P03 ) & G ( P09 )\n", + "F ( P05 & F ( P04 ) )\n", + "F ( P01 ) & G ( P07 )\n", + "F ( P02 & F ( P12 ) )\n", + "F ( P11 & F ( P09 ) )\n", + "F ( P04 & F ( P05 ) )\n", + "F ( P07 & F ( P12 ) )\n", + "F ( P09 & F ( P08 ) )\n", + "F ( P09 & F ( P04 ) )\n", + "F ( P05 & F ( P02 ) )\n", + "F ( P12 ) & G ( P06 )\n", + "F ( P11 & F ( P05 ) )\n", + "F ( P10 ) & G ( P09 )\n", + "F ( P07 & F ( P05 ) )\n", + "F ( P06 & F ( P02 ) )\n", + "F ( P11 ) & G ( P05 )\n", + "F ( P04 & F ( P11 ) )\n", + "F ( P04 & F ( P12 ) )\n", + "F ( P10 ) & G ( P08 )\n", + "F ( P02 ) & G ( P07 )\n", + "F ( P03 & F ( P11 ) )\n", + "F ( P06 & F ( P03 ) )\n", + "F ( P08 & F ( P05 ) )\n", + "F ( P11 ) & G ( P07 )\n", + "F ( P04 ) & G ( P09 )\n", + "F ( P05 & F ( P08 ) )\n", + "F ( P07 & F ( P08 ) )\n", + "F ( P11 & F ( P01 ) )\n", + "F ( P04 & F ( P03 ) )\n", + "F ( P09 & F ( P01 ) )\n", + "F ( P12 & F ( P10 ) )\n", + "F ( P05 ) & G ( P08 )\n", + "F ( P11 & F ( P08 ) )\n", + "F ( P11 ) & G ( P04 )\n", + "F ( P04 & F ( P09 ) )\n", + "F ( P11 & F ( P07 ) )\n", + "F ( P05 ) & G ( P07 )\n", + "F ( P02 & F ( P09 ) )\n", + "F ( P08 & F ( P07 ) )\n", + "F ( P10 ) & G ( P03 )\n", + "F ( P12 & F ( P09 ) )\n", + "F ( P11 & F ( P04 ) )\n", + "F ( P11 ) & G ( P06 )\n", + "F ( P07 & F ( P11 ) )\n", + "F ( P02 & F ( P11 ) )\n", + "F ( P02 & F ( P06 ) )\n", + "F ( P05 & F ( P06 ) )\n", + "F ( P06 & F ( P08 ) )\n", + "F ( P05 & F ( P12 ) )\n", + "F ( P12 & F ( P03 ) )\n", + "F ( P04 & F ( P08 ) )\n", + "F ( P04 & F ( P06 ) )\n", + "F ( P02 & F ( P08 ) )\n", + "F ( P03 & F ( P04 ) )\n", + "F ( P12 & F ( P04 ) )\n", + "F ( P10 ) & G ( P05 )\n", + "F ( P09 & F ( P12 ) )\n", + "F ( P01 & F ( P09 ) )\n", + "F ( P01 & F ( P05 ) )\n", + "F ( P05 & F ( P07 ) )\n", + "F ( P12 & F ( P07 ) )\n", + "F ( P01 & F ( P06 ) )\n", + "F ( P06 & F ( P11 ) )\n", + "F ( P02 & F ( P07 ) )\n", + "F ( P11 & F ( P06 ) )\n" + ] + } + ], + "source": [ + "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n", + " print(ltl)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drone-planning Domain, with augmentation\n", + "Number of Data Points 6185\n", + "Number of unique LTL expressions: 355\n", + "Number of unique LTL structures: 5\n" + ] + } + ], + "source": [ + "print(\"Drone-planning Domain, with augmentation\")\n", + "print(\"Number of Data Points\", len(DPs))\n", + "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n", + "print(\"Number of unique LTL structures:\", 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "F ( AP )\n", + "- Go to AP\n", + "\n", + "F ( AP1 & F ( AP2 ) )\n", + "- First go to AP1 and then to AP2\n", + "\n", + "\n", + "F ( AP1 ) & G ( AP2 )\n", + "- Go to AP1 and Always in AP2\n", + "\n", + "\n", + "F ( AP1 ) & ~ ( AP2 )\n", + "- Go to AP1 and avoid AP2 \n", + "\n", + "~ ( AP1 ) U ( AP2 )\n", + "- avoid going through AP1 until go through AP2\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up LTL expressions " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ori_aps = set()\n", + "for unique_ltl in unique_ltls:\n", + " ori_aps.update(unique_ltl.split())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'&',\n", + " '(',\n", + " ')',\n", + " 'F',\n", + " 'G',\n", + " 'P01',\n", + " 'P02',\n", + " 'P03',\n", + " 'P04',\n", + " 'P05',\n", + " 'P06',\n", + " 'P07',\n", + " 'P08',\n", + " 'P09',\n", + " 'P10',\n", + " 'P11',\n", + " 'P12',\n", + " 'U',\n", + " '~'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ori_aps" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "APs = [\n", + " {'ap': 'blue_room', 'eng': 'the blue room'}, \n", + " {'ap': 'green_room', 'eng': 'the green room'},\n", + " {'ap': 'orange_room', 'eng': 'the orange room'},\n", + " {'ap': 'purple_room', 'eng': 'the purple room'},\n", + " {'ap': 'red_room', 'eng': 'the red room'},\n", + " {'ap': 'yellow_room', 'eng': 'the yellow room'},\n", + " {'ap': 'first_floor', 'eng': 'the first floor'},\n", + " {'ap': 'second_floor', 'eng': 'the second floor'},\n", + " {'ap': 'third_floor', 'eng': 'the third floor'},\n", + " {'ap': 'landmark_1', 'eng': 'landmark 1'},\n", + " {'ap': 'landmark_2', 'eng': 'landmark 2'},\n", + " {'ap': 'landmark_3', 'eng': 'landmark 3'},\n", + "]\n", + "APs = [\n", + " {'ap': 'P01', 'eng': 'Prop_01'},\n", + " {'ap': 'P02', 'eng': 'Prop_02'},\n", + " {'ap': 'P03', 'eng': 'Prop_03'},\n", + " {'ap': 'P04', 'eng': 'Prop_04'},\n", + " {'ap': 'P05', 'eng': 'Prop_05'},\n", + " {'ap': 'P06', 'eng': 'Prop_06'},\n", + " {'ap': 'P07', 'eng': 'Prop_07'},\n", + " {'ap': 'P08', 'eng': 'Prop_08'},\n", + " {'ap': 'P09', 'eng': 'Prop_09'},\n", + " {'ap': 'P10', 'eng': 'Prop_10'},\n", + " {'ap': 'P11', 'eng': 'Prop_11'},\n", + " {'ap': 'P12', 'eng': 'Prop_12'},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# from the translator\n", + "def build_type_A(ap1):\n", + " raw_ltl = f\"F ( {ap1['ap']} )\"\n", + " raw_ltl = f\"F ( {ap1['ap']} )\"\n", + " canonical_ltl = f\"finally ( {ap1['eng']} )\"\n", + " translated = f\"go to {ap1['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_B(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n", + " canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n", + " translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_C(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n", + " canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n", + " translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_D(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n", + " canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n", + " translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_E(room_1, room_2):\n", + " raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n", + " canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n", + " translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "translation_seeds = []\n", + "for r1 in APs:\n", + " translation_seeds.append(build_type_A(r1))\n", + " for r2 in APs:\n", + " if r1 == r2:\n", + " continue\n", + " translation_seeds.append(build_type_B(r1, r2))\n", + " translation_seeds.append(build_type_C(r1, r2))\n", + " translation_seeds.append(build_type_D(r1, r2))\n", + " translation_seeds.append(build_type_E(r1, r2))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "540\n" + ] + } + ], + "source": [ + "print(len(translation_seeds))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set()\n" + ] + } + ], + "source": [ + "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", + "print(unique_ltls - seed_ltls)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the canonical decoding list" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "possible_decodings = {}\n", + "for seed in translation_seeds:\n", + " canonical = seed['canonical_ltl']\n", + " possible_decodings[canonical] = {\n", + " 'formula': canonical,\n", + " 'raw': seed['raw_ltl'],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"canonical.json\", 'w') as f:\n", + " f.write(json.dumps(possible_decodings, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save translation seed for zero shot" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"train_seed.jsonl\", \"w\") as f:\n", + " for dp in translation_seeds:\n", + " better_ltl = dp['canonical_ltl']\n", + " entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save golden data for evaluation " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "raw_canonical_mapping = {\n", + " seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6185" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(DPs)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ltl': '~ ( P04 ) U ( P09 )',\n", + " 'eng': 'go to the twond floor avoiding Prop_04 . if you reached Prop_09 you can stay there , or head to Prop_04 .'}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DPs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"golden.jsonl\", \"w\") as f:\n", + " for dp in DPs:\n", + " entry = {\n", + " 'canonical': raw_canonical_mapping[dp['ltl']],\n", + " 'formula': raw_canonical_mapping[dp['ltl']],\n", + " 'natural': dp['eng'],\n", + " 'raw_ltl': dp['ltl'],\n", + " }\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('GPML')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..2756c0722662dee920aca0e381d2dfbaa24c07ed --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92619236e5914a378b02590aa849b638636f8e990fed0c5871ca825a3514898 +size 279550 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt new file mode 100644 index 0000000000000000000000000000000000000000..f298beb880f69480609f94bc2195d934578233c0 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f94fbdd544881ff5878b8c1bac235b3e2daf9f3299534a6f4244ff7a1735b8d +size 127212 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..76c69e58b4a0a682801b56d9db82065635bbaed6 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0497459f33dea5b40f948bb4669c686533b28884e1dd1de16e476629bde672 +size 82488 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json new file mode 100644 index 0000000000000000000000000000000000000000..9e3f1838116d9cc1154aa8a300d8130454d6eedb --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1363d85f461329a60fc7199493899280d5d3b514c88283470a8423504021364 +size 1249 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..96dada4868f79e48eb6afa92c6c61d89563c251d --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1a0d48d87f6a65c82b7645e8d3dc2709f0b19adf16379eb7df066e1ca4b8a8 +size 256421 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b5c545db1abb15e296b062aa0b1cc828cc1dad5e --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"raw/ltl.txt\", \"r\") as f:\n", + " raw_ltls = f.read().splitlines()\n", + " unique_ltls = set(raw_ltls)\n", + "with open(\"raw/eng.txt\", 'r') as f:\n", + " raw_engs = f.read().splitlines()\n", + "\n", + "DPs = []\n", + "for ltl, eng in zip(raw_ltls, raw_engs):\n", + " DPs.append({'ltl': ltl, 'eng': eng})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "G & U S ! C F C\n", + "G & U S ! A F A\n", + "G & U S ! Y F Y\n", + "G & U S ! R F R\n", + "G & U S ! B F B\n" + ] + } + ], + "source": [ + "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n", + " print(ltl)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Clean-up Domain, with augmentation\n", + "Number of Data Points 744\n", + "Number of unique LTL expressions: 5\n", + "Number of unique LTL structures: 1\n" + ] + } + ], + "source": [ + "print(\"Clean-up Domain, with augmentation\")\n", + "print(\"Number of Data Points\", len(DPs))\n", + "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n", + "print(\"Number of unique LTL structures:\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "G & U S ! A F A\n", + "move the arm to the block and pick it up next placing it in the bin and coming back to pick up the next block\n", + "\n", + "G & U S ! R F R\n", + "watch for blocks to be set down and then move them into the basket but not any orange ones\n", + "\n", + "G & U S ! B F B\n", + "watch for blocks to be set down and then move them into the basket but not any blue ones\n", + "\n", + "G & U S ! C F C\n", + "watch for blocks to be set down and then move them into the basket but not any green ones\n", + "\n", + "G & U S ! Y F Y\n", + "watch for blocks to be set down and then move them into the basket but not any yellow ones\n", + "\n" + ] + } + ], + "source": [ + "seen = set()\n", + "from random import shuffle, seed\n", + "seed(0)\n", + "# shuffle(DPs)\n", + "for dp in DPs:\n", + " if dp['ltl'] not in seen:\n", + " seen.add(dp['ltl'])\n", + " print(dp['ltl'])\n", + " print(dp['eng'])\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up LTL expressions " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "APs = [\n", + " {'ap': \"A\", 'eng': \"any cubes\"},\n", + " {'ap': \"R\", 'eng': \"any non red cubes\"},\n", + " {'ap': \"B\", 'eng': \"any non blue cubes\"},\n", + " {'ap': \"Y\", 'eng': \"any non yellow cubes\"},\n", + " {'ap': \"C\", 'eng': \"any non green cubes\"},\n", + "]\n", + "APs = [\n", + " {'ap': \"P01\", 'eng': \"any cubes\"},\n", + " {'ap': \"P02\", 'eng': \"any non red cubes\"},\n", + " {'ap': \"P03\", 'eng': \"any non blue cubes\"},\n", + " {'ap': \"P04\", 'eng': \"any non yellow cubes\"},\n", + " {'ap': \"P05\", 'eng': \"any non green cubes\"},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def build_type_A(ap1):\n", + " return {\n", + " 'raw_ltl': f\"G & U S ! {ap1['ap']} F {ap1['ap']}\",\n", + " 'canonical_ltl': f\"globally ( and ( until ( scan , not ( {ap1['eng']} ) ) , finally ( {ap1['eng']} ) ) )\",\n", + " 'eng': f\"Look for and pick up {ap1['eng']} and put them in crate.\"}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "translation_seeds = []\n", + "for r1 in APs:\n", + " translation_seeds.append(build_type_A(r1))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n" + ] + } + ], + "source": [ + "print(len(translation_seeds))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "set()\n" + ] + } + ], + "source": [ + "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", + "print(unique_ltls - seed_ltls)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the canonical decoding list" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "possible_decodings = {}\n", + "for seed in translation_seeds:\n", + " canonical = seed['canonical_ltl']\n", + " possible_decodings[canonical] = {\n", + " 'formula': canonical,\n", + " 'raw': seed['raw_ltl'],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"canonical.json\", 'w') as f:\n", + " f.write(json.dumps(possible_decodings, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save translation seed for zero shot" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"train_seed.jsonl\", \"w\") as f:\n", + " for dp in translation_seeds:\n", + " better_ltl = dp['canonical_ltl']\n", + " entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save golden data for evaluation " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "raw_canonical_mapping = {\n", + " seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "744" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(DPs)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"golden.jsonl\", \"w\") as f:\n", + " for dp in DPs:\n", + " entry = {\n", + " 'canonical': raw_canonical_mapping[dp['ltl']],\n", + " 'formula': raw_canonical_mapping[dp['ltl']],\n", + " 'natural': dp['eng'],\n", + " 'raw_ltl': dp['ltl'],\n", + " }\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('GPML')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ae7e21ced6e1aaf90aafe45e2839fd13cd412e3 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56804682d199b31496377cdf96d26a55dd1d56a3c23f75fcae2b67ae74c55c6e +size 59880 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cb701336ff2b9e2bd533bd5858f1d6f9c2e45a7 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e87a60993706acc9b49cd7c77af816a2da598af6b4a207c015222cbd33a88d8 +size 59540 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt new file mode 100644 index 0000000000000000000000000000000000000000..a43774e6d72914ceab9e5f7aa76553bb9d6a7e4f --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:824bdb89067fc97767f2239c8c930c657bbcf9ace019f8b8369ed6a12e920caa +size 14879 diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cfe8cedd74fa10d259a9b398a76e3431116402b6 --- /dev/null +++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff7cee728f6db130cb08340071e38939f55b1c8ca7e8b0c34faecb18a597347 +size 1485 diff --git a/NL2TL-dataset/datasets-Lang2LTL/README.md b/NL2TL-dataset/datasets-Lang2LTL/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0752c463862c24862eb1c1b24d03848f3e922ad8 --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/README.md @@ -0,0 +1,9 @@ +# Lang2LTL +Translation from natural language to Linear Temporal Logic + +There are two models in this project. +- Seq2Seq +- Seq2Seq with NER + +The comet expriment linked to these two models are: +https://www.comet.ml/suchzheng2/lang2ltl/view/new diff --git a/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv b/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv new file mode 100644 index 0000000000000000000000000000000000000000..d16154f7a5c3a16f4997f54cbc88d7bf0d2ce50b --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dccd1079c60de4b86f3467cdd6ae157ad9484f88126c9e8a570f465f55e5a2 +size 45392 diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt new file mode 100644 index 0000000000000000000000000000000000000000..691a6f16beeb373081fdd7de0741db0931c5e66e --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf7a7de8ce5e3d0da2ed861eda7b80c998837a9314198cf5c85d0158dbc46e6 +size 6372 diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt new file mode 100644 index 0000000000000000000000000000000000000000..887a529d456b8b4216ec3e84d9abcd2c82deeca4 --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b711d75a567a06fd2d6037c25469b7aa6b9cd4a906619dea488d3d77d6259e89 +size 1158 diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results new file mode 100644 index 0000000000000000000000000000000000000000..f11337984a0601371bfe30b6c7cda294cb00db60 --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results @@ -0,0 +1,219 @@ +> find the bookstore and then find the nelson fitness center += f (bookstore & f (nelson fitness center)) + + +> stay on the angell st first find the bookstore and then find the chipotle += g (angell st) & f (bookstore & f (chipotle)) + + +> do not enter angell st first find the barus building and then find the marston hall += g (! angell st) & f (barus building & f (marston hall)) + + +> stay on the thayer st and find the chipotle += g (thayer st) & f (chipotle) + + +> go to the citizen's bank and then go to the bookstore += f (citizen's bank & f (bookstore)) + + +> do not enter brook st first find the cvs and then find the chipotle += g (! brook st) & f (cvs & f (chipotle)) + + +> do not enter thayer st and find the marston hall += g (! thayer st) & f (marston hall) + + +> do not enter brook st first find the bookstore and then find the chipotle += g (! brook st) & f (bookstore & f (chipotle)) + + +> do not enter brook st first find the starbucks and then find the bookstore += g (! brook st) & f (starbucks & f (bookstore)) + + +> stay on the thayer st first find the chipotle and then find the starbucks += g (thayer st) & f (chipotle & f (starbucks)) + + +> do not enter brook st first find the chipotle and then find the cvs += g (! brook st) & f (chipotle & f (cvs)) + + +> go to the science library and then go to the watson center += f (science library & f (watson center)) + + +> go to the cvs and then go to the bookstore += f (cvs & f (bookstore)) + + +> go to the science library and then go to the chipotle += f (science library & f (chipotle)) + + +> find the barus building and then find the fedex office += f (barus building & f (fedex office)) + + +> go to the science library and then go to the citizen's bank += f (science library & f (citizen's bank)) + + +> stay on the thayer st first find the bookstore and then find the chipotle += g (thayer st) & f (bookstore & f (chipotle)) + + +> go to the watson center and then go to the cvs += f (watson center & f (cvs)) + + +> go to the marston hall and then go to the nelson fitness center += f (marston hall & f (nelson fitness center)) + + +> go to the chipotle and then go to the kabob and curry += f (chipotle & f (kabob and curry)) + + +> stay on the thayer st first find the cvs and then find the bookstore += g (thayer st) & f (cvs & f (bookstore)) + + +> do not enter angell st and find the barus building += g (! angell st) & f (barus building) + + +> find the cvs and then find the starbucks += f (cvs & f (starbucks)) + + +> go to the cvs and then go to the fedex office += f (cvs & f (fedex office)) + + +> go to the science library and then go to the starbucks += f (science library & f (starbucks)) + + +> go to the barus building and then go to the chipotle += f (barus building & f (chipotle)) + + +> do not enter brook st first find the bookstore and then find the chipotle += g (! brook st) & f (bookstore & f (chipotle)) + + +> find the fedex office and then find the chipotle += f (fedex office & f (chipotle)) + + +> do not enter brook st first find the bookstore and then find the starbucks += g (! brook st) & f (bookstore & f (starbucks)) + + +> find the citizen's bank and then find the kabob and curry += f (citizen's bank & f (kabob and curry)) + + +> do not enter angell st first find the watson center and then find the marston hall += g (! angell st) & f (watson center & f (marston hall)) + + +> go to the barus building and then go to the marston hall += f (barus building & f (marston hall)) + + +> find the kabob and curry and then find the citizen's bank += f (kabob and curry & f (citizen's bank)) + + +> do not enter angell st first find the citizen's bank and then find the watson center += g (! angell st) & f (citizen's bank & f (watson center)) + + +> do not enter brook st and find the chipotle += g (! brook st) & f (chipotle) + + +> find the fedex office and then find the marston hall += f (fedex office & f (marston hall)) + + +> go to the citizen's bank += f (citizen's bank) + + +> stay on the thayer st first find the kabob and curry and then find the bookstore += g (thayer st) & f (kabob and curry & f (bookstore)) + + +> do not enter angell st first find the marston hall and then find the watson center += g (! angell st) & f (marston hall & f (watson center)) + + +> find the science library and then find the fedex office += f (science library & f (fedex office)) + + +> do not leave the thayer st and find the chipotle += g (thayer st) & f (chipotle) + + +> find the marston hall and then find the science library += f (marston hall & f (science library)) + + +> go to the watson center and then go to the nelson fitness center += f (watson center & f (nelson fitness center)) + + +> do not enter angell st first find the citizen's bank and then find the marston hall += g (! angell st) & f (citizen's bank & f (marston hall)) + + +> do not enter brook st first find the science library and then find the bookstore += g (! brook st) & f (science library & f (bookstore)) + + +> stay on the brook st and find the marston hall += g (brook st) & f (marston hall) + + +> go to the marston hall and then go to the cvs += f (marston hall & f (cvs)) + + +> stay away from thayer st and find the barus building += g (! thayer st) & f (barus building) + + +> do not enter brook st first find the bookstore and then find the chipotle += g (! brook st) & f (bookstore & f (chipotle)) + + +> do not enter brook st first find the chipotle and then find the bookstore += g (! brook st) & f (chipotle & f (bookstore)) + + +> stay away from thayer st and find the bookstore += g (! thayer st) & f (bookstore) + + +> go to the nelson fitness center and then go to the chipotle += f (nelson fitness center & f (chipotle)) + + +> find the bookstore and then find the marston hall += f (bookstore & f (marston hall)) + + +> find the kabob and curry and then find the starbucks += f (kabob and curry & f (starbucks)) + + +> stay on the thayer st first find the kabob and curry and then find the cvs += g (thayer st) & f (kabob and curry & f (cvs)) + \ No newline at end of file diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee7cb2d8a54a721c8c6922c5b5b88d7e05530ddf --- /dev/null +++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a579605b2bb4bfa35d2e2b6d2cb051c2dde5179c0ceabd4ab00fb37c7b2c6f1 +size 959 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d31d1f55a9d923930cd06a2408464e0dee86799 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b10c9628cb9941c8b7ea7fea71b2fa16edcd2779a6033bf473a6dcdd64f000f +size 604641 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..105beb79da430fbc3d835d1206c2961fda1dcf6e --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb +size 12458149 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d31d1f55a9d923930cd06a2408464e0dee86799 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b10c9628cb9941c8b7ea7fea71b2fa16edcd2779a6033bf473a6dcdd64f000f +size 604641 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..37d273a7b8f000b9cdaad5ee816f205604eb88b7 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929120bed687b075462988aa54206517a6a7b962392e6d5cb298a571a31ea43d +size 458174 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..aef9cd294a1c4685e353a7e74e0b3e4b3c432361 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930 +size 1839113 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt new file mode 100644 index 0000000000000000000000000000000000000000..2adaed9e6fe8556d1aec8f74d72c77b255cb1d2e --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de33b8c18bd4d1ba0d7cf4a49fb3ff08f908eca0942484f7f37c8c8d70cb356 +size 154268 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d75d5ba461a87093b82d116f7b783c362a8e440 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a639bac11e74ce17140e8812eede765580025094975e105bbdb423639f8fdea +size 32323 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec4b031ca74bb00bb5c235b4b88f38147236fb59 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afb8bfcf7df46c5c52fd15667cf10f325c39d9b9edd195f0b880f29b6a44dcd +size 441 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt new file mode 100644 index 0000000000000000000000000000000000000000..eac5a2c6b32da7a8b0c91027183c600eeed745bc --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:336e8478aa231f107b1285ba3d071920a4d7cf5279d942028cc71f85031ccd16 +size 122528 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f6cce88f36e042dc627f6d0f6c5c12cb89725ce --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910615cecd4b8af1ac735bc3111e6b76e24891cf53f82150dd4e876953d51a14 +size 24990 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt new file mode 100644 index 0000000000000000000000000000000000000000..e6396ae796327b05823f1c77e3b2a3b082dca63c --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a14b0d7313d27a7dd2bdfcbb128b08f155a9957ef3497bcf570c6746023f731 +size 485651 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt new file mode 100644 index 0000000000000000000000000000000000000000..9581e635af74d25be566710a6c2efc81d15294e4 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1985262c03d599245e46c81cacd274bdaf84e0561f052fa5230e4754238718e +size 100804 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa7b7d9f4a291bc94c95fd5e95609e9b66c4e6c2 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694127ec676506947299d57959e190393efca1a70ff259221291caf3032c9b8b +size 372558 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe701928335784cf5b03787226c0b1a85057bee9 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3021aa4b921095d88d7bbba4ecf9190dad249c3eccc0b3ded6a7ee5ff3609d4f +size 453829 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt new file mode 100644 index 0000000000000000000000000000000000000000..ebda7126793223e9a9346a6874c0225e26a7f729 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:294efdd3cc1dd1a7c980fb3360c2dd1c9864be551fadb74e0cbc8d74e8ae098d +size 849652 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt new file mode 100644 index 0000000000000000000000000000000000000000..d395a6c8b06a33eb72412a3147335f11ae7aaeec --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00fb12bef2f9c5dcb1b1e266dc5989c60ac0783f729237e0c51e48997e4b080 +size 498925 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dc97ed176f0b9a36d948ca1bdd8e802ffceea87c --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29be14b96cb7519d73901f6ed1c02d85bb29ee1a3ed1f306f3ffe0af272fca81 +size 7507287 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8 +size 3825939 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/README.md b/NL2TL-dataset/datasets-NL2TL_data_github/README.md new file mode 100644 index 0000000000000000000000000000000000000000..953ca8391973537fcd1991d15e29e1a7a510087b --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/README.md @@ -0,0 +1,121 @@ +# NL2STL +Webpage: https://yongchao98.github.io/MIT-realm-NL2TL/ + +Demo Website: http://realm-02.mit.edu:8444 + +Paper Link: https://arxiv.org/pdf/2305.07766.pdf + +Dataset Link: https://drive.google.com/drive/folders/10F-qyOhpqEi83o9ZojymqRPUtwzSOcfq?usp=sharing + +Model Link: [https://drive.google.com/drive/folders/1vSaKOunMPA3uiOdx6IDbe-gmfREXQ9uO?usp=share_link](https://drive.google.com/drive/folders/1ZfZoYovWoy5z247VXZWZBniNrCOONX4N?usp=share_link) + +To access the Demo Website, please send email to ycchen98@mit.edu or yongchaochen@fas.harvard.edu for **password** + +This project is to transform human natural languages into Signal temporal logics (STL). Here to enhance the generalizability, in each natural language the specific atomic proposition (AP) is represented as prop_1, prop_2, etc. In this way, the trained model can be easier to transfer into various specific domains. The APs refer to some specific specifications like grad the apple, or go to the room. + +Also in the current work, the co-reference is not considered. Therefore, **each prop_i should only appear once in each sentence**. One inference example is as the following: + +Input natural language: + +``` +If ( prop_2 ) happens and continues to happen until at some point during the 176 to 415 time units that ( prop_1 ) , and also if ( prop_3 ) , then the scenario is equivalent to ( prop_4 ) . +``` + +Output Signal temporal logic: + +``` +( ( ( prop_2 until [176,415] prop_1 ) and prop_3 ) equal prop_4 ) +``` + +The operations we used are U(until), F(finally), G(globally), |(or), &(and), ->(imply), <->(equal), negation. Also we allow the time interval definition, like U[0,5], F[12,100], and G[30,150]. The time numer right now is constrained into integer, and can use infinite to express all the time in the future, like [5,infinite]. The following are the illustrations. More NL-TL pair examples at https://drive.google.com/file/d/1f-wQ8AKInlTpXTYKwICRC0eZ-JKjAefh/view?usp=sharing +``` +prop_1 U[0,5] prop_2 : There exits one time point t between 0 and 5 timesteps from now, that prop_1 continues to happen until at this timestep, and prop_2 happens at this timestep. +``` +``` +F[12,100] prop_2 : There exits one time point t between 12 and 100 timesteps from now, that prop_2 happens at this timestep. +``` +``` +G[30,150] prop_2 : For all the time between 30 and 150 timesteps from now, that prop_2 always happens. +``` +``` +prop_1 -> prop_2 : If prop_1 happens, then prop_2 also happens. +``` +``` +prop_1 <-> prop_2: prop_1 happens if and only if prop_2 happens. +``` + +## Description + +Signal Temporal Logic (STL) is a formal language for specifying properties of signals. It is used to specify properties of continuous-time signals, such as signals from sensors or control systems, in a way that is precise and easy to understand. + +STL has a syntax that is similar to the temporal logic used in computer science, but it is specialized for continuous-time signals. It includes operators for describing the values of a signal, as well as operators for combining and modifying those descriptions. + +For example, the STL formula F[0, 2] (x > 0.5) specifies the property that the signal x is greater than 0.5 for all time points between 0 and 2 seconds. This formula can be read as "the signal x is eventually greater than 0.5 for a period of at least 2 seconds". + +STL can be used to verify that a signal satisfies a given property, or to synthesize a controller that ensures that a signal satisfies a given property. It is a powerful tool for reasoning about the behavior of continuous-time systems. + +While STL is quite powerful, humans are more familiar with defining the tasks via natural languages. Here we try to bridge this gap via fine-tuning large languages models. + +## Getting Started + +### Dependencies + +* The inference model should run on GPU, you can run the notebook file Run.ipynb on Google Colab, or run_trained_model.py on your own GPU environment. +* As for setting the environment, here we install our environmrnt via Minoconda. You can first set up it via https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html +* Then it is time to install packages: +``` +conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia +conda install pip +conda install python +conda install numpy +conda install pandas +pip install transformers +pip install SentencePiece +``` + +### Installing + +* First download the whole directory with command +``` +git clone git@github.com:yongchao98/NL2TL.git +``` +* Then download the trained wieghts (e.g., checkpoint-62500) of our model in [https://drive.google.com/file/d/19uiB_2XnnnVmDInaLbQeoZq25ghUdg4D/view](https://drive.google.com/drive/folders/1ZfZoYovWoy5z247VXZWZBniNrCOONX4N?usp=sharing) +* After downloading both the code and model weights, put the model weights checkpoint-62500 into your self-defined directory. + +### Other codes and datasets + +* As for other codes and datasets published on github, please read the **Illustration of Code and Dataset.pdf** for specific explanation of their utilities. + +## Authors + +Contributors names and contact info + +Yongchao Chen (Harvard University, Massachusetts Institute of Technology, Laboratory of Information and Decision Systems) + +yongchaochen@fas.harvard.edu or ycchen98@mit.edu + +## Citation for BibTeX + +@article{chen2023nl2tl, + title={NL2TL: Transforming Natural Languages to Temporal Logics using Large Language Models}, + author={Chen, Yongchao and Gandhi, Rujul and Zhang, Yang and Fan, Chuchu}, + journal={arXiv preprint arXiv:2305.07766}, + year={2023} +} +} + +## Version History + +* 0.1 + * Initial Release on May 12, 2023 + +## License + +This corresponding paper of this project will be attached here in the next months. This project can only be commercially used under our permission. + +## Recommended Work + +[AutoTAMP: Autoregressive Task and Motion Planning with LLMs as Translators and Checkers](https://arxiv.org/pdf/2306.06531.pdf) + +[Scalable Multi-Robot Collaboration with Large Language Models: Centralized or Decentralized Systems?](https://yongchao98.github.io/MIT-REALM-Multi-Robot/) + diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx b/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx new file mode 100644 index 0000000000000000000000000000000000000000..341b509d9654aacd98cef578859db88885a5f035 Binary files /dev/null and b/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx differ diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..72ddc3104cd9c4475e2a2f225f37780bf891a303 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee8491bd2b58b6ade2137528cd0686bea5414a6c4b3683af3079a9ead8239a4 +size 57441457 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f695e33725dca8e64bfc50f6e02828ceb937a5e7 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1528e3aa2e9bb2a11982acabcb652f7e0fb96a14f40137a365fdde22dda2a903 +size 85675938 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f695e33725dca8e64bfc50f6e02828ceb937a5e7 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1528e3aa2e9bb2a11982acabcb652f7e0fb96a14f40137a365fdde22dda2a903 +size 85675938 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv new file mode 100644 index 0000000000000000000000000000000000000000..abf00bb38f92102151b812906cb509d32bb5bde7 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c56b5ef5ec82edfcf51926f74fddc977514e53c1e8c3cc1a6012bd9c6c3047c +size 46673129 diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1 --- /dev/null +++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8 +size 3825939 diff --git a/NL2TL-dataset/datasets-nl2spec/Readme.md b/NL2TL-dataset/datasets-nl2spec/Readme.md new file mode 100644 index 0000000000000000000000000000000000000000..8d807780fb55214df2b1ac9fb852eba520ef8965 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/Readme.md @@ -0,0 +1,98 @@ +# Overview + +nl2spec is a framework for applying Large Language Models (LLMs) to derive formal specifications (in temporal logics) from unstructured natural language. It introduces a new methodology to detect and resolve the inherent ambiguity of system requirements in natural language by utilizing LLMs to map subformulas of the formalization back to the corresponding natural language fragments of the input. Users iteratively add, delete, and edit these sub-translations to amend erroneous formalizations, which is easier than manually redrafting the entire formalization. + +The tool works best when providing in-distribution few-shot examples (see [Prompting](#prompting)). It can be easily adjusted to other models by extending the backend (see [Extendability](#extendability)). + +# Install + +Fulfill dependencies: +- [flask](https://flask.palletsprojects.com/en/2.2.x/) +- [ltlf2dfa](https://github.com/whitemech/LTLf2DFA) +- [huggingface](https://huggingface.co/) +- [openai-api](https://openai.com/blog/openai-api) +- [google-cloud-aiplatform](https://cloud.google.com/python/docs/reference/aiplatform/latest/index.html) + +All model inferences run in the cloud. Therefore, access to the respective APIs is necessary. + - For bloom and bloomz, a [huggingface](huggingface.co) *User Access Token* is needed. You can create a token and use bloom/bloomz for free. + - For Codex / GPT-based models, an [OpenAI](openai.com) *API key* is needed. See [OpenAI's Pricing](https://openai.com/pricing) + - For using PaLM, access through *google cloud platform* is required. Set up a project in [google cloud platform](https://console.cloud.google.com/) with [Vertex AI enabled](https://console.cloud.google.com/vertex-ai) and [authenticate with the Google Cloud CLI](https://cloud.google.com/cli). See [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing). + - See [Extendability](#extendability) on how to add more models. + - Model inference through the APIs may be discontinued by the providers, which is beyond our control. This currently affects codex and bloomz. In the web interface, we marked these models as *obsolete*. + + +# Run frontend + +From inside the ```src``` folder: +- create the following file and paste your open ai, hf key or google project id into: ```keys/oai_key.txt```, ```keys/hf_key.txt``` or ```keys/google_project_id.txt``` +- ```python3 -m flask --app frontend.py run``` +- add ```--debug``` for debug mode +- Then open web-based tool at http://127.0.0.1:5000 + +# Run from terminal + +Can also be run from command line from the ```scr``` folder. See ```python3 nl2ltl.py --help``` for more details. + +E.g., with gpt-3.5: + +```python3 nl2ltl.py --model gpt-3.5-turbo --keyfile PATH/TO/YOUR/OPENAIKEY --nl "Globally a and b until c." --num_tries 3 --temperature 0.2 --prompt minimal``` + +or bloom + +```python3 nl2ltl.py --model bloom --keyfile PATH/TO/YOUR/HFKEY --nl "Every request is eventually followed by a grant." --num_tries 3 --temperature 0.2 --prompt minimal``` + +or PaLM + +```python3 nl2ltl.py --model text-bison@001 --keyfile PATH/TO/YOUR/GCLOUD/PROJECTID --nl "Every request is eventually followed by a grant." --num_tries 3 --temperature 0.2 --prompt minimal``` + +# Prompting + +The prompting pattern consists of a short introduction to the specification language, followed by few-shot examples that exploit the LLM to construct subtranslations. +We provide four example prompts in ```/prompts```: +- minimal.txt - the generic prompt used for the user study experiments in the [paper]() +- indistribution.txt - the prompt with few-shot examples drawn from the user study +- smart.txt - a prompt obtained from examples in a recently conducted [smart home user study]() +- stl.txt - a proof of concept extension for [Signal Temporal Logic (STL)]() + +See [the paper]() and ```/prompts``` for more details. +The prompts in nl2spec must follow a specific pattern to obtain the subtranslations as follows. + +`````` + +Followed by few shot examples, where each few shot example consists of the following: + +`````` + +`````` + +`````` + +`````` + +`````` + +# Extendability + +The tool can be easily extended to other specification languages based on temporal logics (see ```/prompts/stl.txt```). The best performance is expected when drawing the few-shot examples from the distribution to translate. Adding a few well-crafted examples to the prompt and then using the tool to translate the rest of the workload is significantly easier than translating every requirement from scratch. + +Additionally, the tool can be extended to more fine-tuned or other upcoming open-source language models. To this end, ```backend.py``` must be extended with a new ```new_model```, for which a method named ```new_model``` must be implemented in ```models.py```. + +# How to cite + +``` +@inproceedings{nl2spec, + title = {nl2spec: Interactively Translating Unstructured Natural Language to Temporal Logics with Large Language Models}, + author = {Cosler, Matthias and Hahn, Christopher and Mendoza, Daniel and Schmitt, Frederik and Trippel, Caroline}, + booktitle = {Computer {Aided} {Verification}}, + series = {Lecture {Notes} in {Computer} {Science}}, + address = {Cham}, + isbn = {978-3-031-37703-7}, + shorttitle = {nl2spec}, + doi = {10.1007/978-3-031-37703-7_18}, + language = {en}, + publisher = {Springer Nature Switzerland}, + editor = {Enea, Constantin and Lal, Akash}, + year = {2023}, + pages = {383--396}, +} +``` diff --git a/NL2TL-dataset/datasets-nl2spec/canonical.json b/NL2TL-dataset/datasets-nl2spec/canonical.json new file mode 100644 index 0000000000000000000000000000000000000000..36e46a293620fda68c6c3c2d91098156085da0c8 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/canonical.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c09ca4c086b268eae4539e446b10d22a887805fc5295b0b9dae0bab0e82ee47 +size 73886 diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv b/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv new file mode 100644 index 0000000000000000000000000000000000000000..fe0f77f259442a7bca35798c0e5a20acab8c3ac3 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78195edfbd5757abaa8f4cb60476e4355cdcd0319a70650c5aa91d4f59f77b81 +size 8019 diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv b/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv new file mode 100644 index 0000000000000000000000000000000000000000..b9f2b0f53d139a9c81cb9b24910cb018b4e7a6ba --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daacb855ff79a51ba2a9bf49cfd561d2404207ad7a2a9730aadfb2dc4059b212 +size 4544 diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv b/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..7fded986cf33b65eb2031a55aafff09002142646 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e1afc28ca110d35c9cc8c9638234f6d2542a5b16367dd4a644b296a4ab556b +size 4508 diff --git a/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b534ac898ff928c11c5e07c13ebd424f3c51a4f --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd34b01f5d369dfb4db3444b808111f62917f2ddb4fd5e5fddb497471546ecb +size 70373 diff --git a/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c919a64e03ead0e7e1c2cb5df8df0bcc6cab1ec --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113cbbde14d29283413e70c0462a43c81c82bf99f32baed94810f6c4629f28ca +size 70346 diff --git a/NL2TL-dataset/datasets-nl2spec/golden.jsonl b/NL2TL-dataset/datasets-nl2spec/golden.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb b/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a22a88b0035306f78c2dbd9c7e9089d2a5ddbb3c --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"raw/ltl.txt\", \"r\") as f:\n", + " raw_ltls = f.read().splitlines()\n", + " unique_ltls = set(raw_ltls)\n", + "with open(\"raw/eng.txt\", 'r') as f:\n", + " raw_engs = f.read().splitlines()\n", + "\n", + "DPs = []\n", + "for ltl, eng in zip(raw_ltls, raw_engs):\n", + " DPs.append({'ltl': ltl, 'eng': eng})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "G (P04)\n", + "F G ! P04\n", + "(! P02) U P01\n", + "G(P03 -> P02)\n", + "F (P03 && P02)\n", + "F P01 && F P02\n", + "G !(P01 & P03)\n", + "G (P02 && P01)\n", + "G(!(P01 & P02))\n", + "G (P03 -> F P06)\n", + "G F P01 || X P02\n", + "P03 U (G (F P04))\n", + "G ( P02 -> X P04)\n", + "G( P01 -> F P02 )\n", + "G (P02 -> X F P01)\n", + "G F P01 || G F P02\n", + " G (P01 <-> X P02)\n", + "G F P03 -> G F P02\n", + "G (P01 -> X G ! P02)\n", + " G (P03 | P04 | P05)\n", + "(P03 U P02) || G P03\n", + " ! G (! (P02 && X P02))\n", + "G(P01 -> X (X (X P02)))\n", + "G P02 && G(P03 -> !P01)\n", + " ! G (! (P01 && X P01))\n", + "G (P03 -> (P04 | X P04))\n", + "G (!(P01 && P02) -> F P03)\n", + "G(P01 -> F P02) -> G F P03\n", + "G (P01 && X P02 -> X X P04)\n", + " (P02 U (P02 & ! P01)) | G P02\n", + "P03 -> X ((P02 U P01) || G P02)\n", + "G(!(P02 &&P04)) && G(P02 || P04)\n", + "(F P05) -> (!P05 U (P06 & !P05))\n", + "G ((P04 <-> P02) -> (P03 <-> P05))\n", + "G ( P01 -> (X !P01 | XX !P01 | XXX !P01))\n", + "P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)\n" + ] + } + ], + "source": [ + "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n", + " print(ltl)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drone-planning Domain, with augmentation\n", + "Number of Data Points 756\n", + "Number of unique LTL expressions: 36\n", + "Number of unique LTL structures: 5\n" + ] + } + ], + "source": [ + "print(\"Drone-planning Domain, with augmentation\")\n", + "print(\"Number of Data Points\", len(DPs))\n", + "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n", + "print(\"Number of unique LTL structures:\", 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "F ( AP )\n", + "- Go to AP\n", + "\n", + "F ( AP1 & F ( AP2 ) )\n", + "- First go to AP1 and then to AP2\n", + "\n", + "\n", + "F ( AP1 ) & G ( AP2 )\n", + "- Go to AP1 and Always in AP2\n", + "\n", + "\n", + "F ( AP1 ) & ~ ( AP2 )\n", + "- Go to AP1 and avoid AP2 \n", + "\n", + "~ ( AP1 ) U ( AP2 )\n", + "- avoid going through AP1 until go through AP2\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up LTL expressions " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ori_aps = set()\n", + "for unique_ltl in unique_ltls:\n", + " ori_aps.update(unique_ltl.split())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'!',\n", + " '!(P01',\n", + " '!P01',\n", + " '!P01)',\n", + " '!P01))',\n", + " '!P05))',\n", + " '&',\n", + " '&&',\n", + " '&&P04))',\n", + " '(',\n", + " '(!',\n", + " '(!(P01',\n", + " '(!P05',\n", + " '((P02',\n", + " '((P04',\n", + " '(F',\n", + " '(G',\n", + " '(P01',\n", + " '(P02',\n", + " '(P03',\n", + " '(P04',\n", + " '(P04)',\n", + " '(P06',\n", + " '(X',\n", + " ')',\n", + " '->',\n", + " '<->',\n", + " 'F',\n", + " 'G',\n", + " 'G(',\n", + " 'G(!(P01',\n", + " 'G(!(P02',\n", + " 'G(P01',\n", + " 'G(P02',\n", + " 'G(P03',\n", + " 'P01',\n", + " 'P01)',\n", + " 'P01))',\n", + " 'P02',\n", + " 'P02)',\n", + " 'P02))',\n", + " 'P02)))',\n", + " 'P03',\n", + " 'P03)',\n", + " 'P04',\n", + " 'P04)',\n", + " 'P04))',\n", + " 'P05)',\n", + " 'P05))',\n", + " 'P06)',\n", + " 'U',\n", + " 'X',\n", + " 'XX',\n", + " 'XXX',\n", + " '|',\n", + " '||'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ori_aps" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "APs = [\n", + " {'ap': 'blue_room', 'eng': 'the blue room'}, \n", + " {'ap': 'green_room', 'eng': 'the green room'},\n", + " {'ap': 'orange_room', 'eng': 'the orange room'},\n", + " {'ap': 'purple_room', 'eng': 'the purple room'},\n", + " {'ap': 'red_room', 'eng': 'the red room'},\n", + " {'ap': 'yellow_room', 'eng': 'the yellow room'},\n", + " {'ap': 'first_floor', 'eng': 'the first floor'},\n", + " {'ap': 'second_floor', 'eng': 'the second floor'},\n", + " {'ap': 'third_floor', 'eng': 'the third floor'},\n", + " {'ap': 'landmark_1', 'eng': 'landmark 1'},\n", + " {'ap': 'landmark_2', 'eng': 'landmark 2'},\n", + " {'ap': 'landmark_3', 'eng': 'landmark 3'},\n", + "]\n", + "APs = [\n", + " {'ap': 'P01', 'eng': 'P01'},\n", + " {'ap': 'P02', 'eng': 'P02'},\n", + " {'ap': 'P03', 'eng': 'P03'},\n", + " {'ap': 'P04', 'eng': 'P04'},\n", + " {'ap': 'P05', 'eng': 'P05'},\n", + " {'ap': 'P06', 'eng': 'P06'},\n", + " {'ap': 'P07', 'eng': 'P07'},\n", + " {'ap': 'P08', 'eng': 'P08'},\n", + " {'ap': 'P09', 'eng': 'P09'},\n", + " {'ap': 'P10', 'eng': 'P10'},\n", + " {'ap': 'P11', 'eng': 'P11'},\n", + " {'ap': 'P12', 'eng': 'P12'},\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# from the translator\n", + "def build_type_A(ap1):\n", + " raw_ltl = f\"F ( {ap1['ap']} )\"\n", + " raw_ltl = f\"F ( {ap1['ap']} )\"\n", + " canonical_ltl = f\"finally ( {ap1['eng']} )\"\n", + " translated = f\"go to {ap1['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_B(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n", + " canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n", + " translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_C(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n", + " canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n", + " translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_D(room_1, room_2):\n", + " raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n", + " canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n", + " translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}\n", + "\n", + "def build_type_E(room_1, room_2):\n", + " raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n", + " canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n", + " translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n", + " return {\n", + " 'raw_ltl': raw_ltl,\n", + " 'canonical_ltl': canonical_ltl,\n", + " 'eng': translated}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "translation_seeds = []\n", + "for r1 in APs:\n", + " translation_seeds.append(build_type_A(r1))\n", + " for r2 in APs:\n", + " if r1 == r2:\n", + " continue\n", + " translation_seeds.append(build_type_B(r1, r2))\n", + " translation_seeds.append(build_type_C(r1, r2))\n", + " translation_seeds.append(build_type_D(r1, r2))\n", + " translation_seeds.append(build_type_E(r1, r2))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "540\n" + ] + } + ], + "source": [ + "print(len(translation_seeds))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'G (!(P01 && P02) -> F P03)', ' ! G (! (P02 && X P02))', 'F (P03 && P02)', 'G(P01 -> F P02) -> G F P03', 'F P01 && F P02', 'G (P02 -> X F P01)', 'P03 U (G (F P04))', 'G F P01 || G F P02', '(! P02) U P01', 'G(P01 -> X (X (X P02)))', 'F G ! P04', 'P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)', 'G (P01 -> X G ! P02)', 'G (P03 -> F P06)', 'G(!(P02 &&P04)) && G(P02 || P04)', 'G ( P02 -> X P04)', 'G (P03 -> (P04 | X P04))', 'G ( P01 -> (X !P01 | XX !P01 | XXX !P01))', 'P03 -> X ((P02 U P01) || G P02)', 'G(P03 -> P02)', '(F P05) -> (!P05 U (P06 & !P05))', 'G ((P04 <-> P02) -> (P03 <-> P05))', 'G F P01 || X P02', ' G (P03 | P04 | P05)', 'G(!(P01 & P02))', ' G (P01 <-> X P02)', 'G (P01 && X P02 -> X X P04)', 'G( P01 -> F P02 )', 'G (P04)', 'G !(P01 & P03)', 'G P02 && G(P03 -> !P01)', ' ! G (! (P01 && X P01))', 'G (P02 && P01)', ' (P02 U (P02 & ! P01)) | G P02', 'G F P03 -> G F P02', '(P03 U P02) || G P03'}\n" + ] + } + ], + "source": [ + "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", + "print(unique_ltls - seed_ltls)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the canonical decoding list" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "possible_decodings = {}\n", + "for seed in translation_seeds:\n", + " canonical = seed['canonical_ltl']\n", + " possible_decodings[canonical] = {\n", + " 'formula': canonical,\n", + " 'raw': seed['raw_ltl'],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"canonical.json\", 'w') as f:\n", + " f.write(json.dumps(possible_decodings, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save translation seed for zero shot" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"train_seed.jsonl\", \"w\") as f:\n", + " for dp in translation_seeds:\n", + " better_ltl = dp['canonical_ltl']\n", + " entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save golden data for evaluation " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "raw_canonical_mapping = {\n", + " seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "756" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(DPs)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ltl': 'G( P01 -> F P02 )',\n", + " 'eng': 'Every P01 is eventually followed by a P02.'}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DPs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'G( P01 -> F P02 )'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mgolden.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m dp \u001b[39min\u001b[39;00m DPs:\n\u001b[1;32m 3\u001b[0m entry \u001b[39m=\u001b[39m {\n\u001b[0;32m----> 4\u001b[0m \u001b[39m'\u001b[39m\u001b[39mcanonical\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39;49m\u001b[39mltl\u001b[39;49m\u001b[39m'\u001b[39;49m]],\n\u001b[1;32m 5\u001b[0m \u001b[39m'\u001b[39m\u001b[39mformula\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m]],\n\u001b[1;32m 6\u001b[0m \u001b[39m'\u001b[39m\u001b[39mnatural\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39meng\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 7\u001b[0m \u001b[39m'\u001b[39m\u001b[39mraw_ltl\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 8\u001b[0m }\n\u001b[1;32m 9\u001b[0m json\u001b[39m.\u001b[39mdump(entry, f)\n\u001b[1;32m 10\u001b[0m f\u001b[39m.\u001b[39mwrite(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)\n", + "\u001b[0;31mKeyError\u001b[0m: 'G( P01 -> F P02 )'" + ] + } + ], + "source": [ + "with open(\"golden.jsonl\", \"w\") as f:\n", + " for dp in DPs:\n", + " entry = {\n", + " 'canonical': raw_canonical_mapping[dp['ltl']],\n", + " 'formula': raw_canonical_mapping[dp['ltl']],\n", + " 'natural': dp['eng'],\n", + " 'raw_ltl': dp['ltl'],\n", + " }\n", + " json.dump(entry, f)\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('GPML')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/NL2TL-dataset/datasets-nl2spec/raw/eng.txt b/NL2TL-dataset/datasets-nl2spec/raw/eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..1383efcbb4d2d3de684bc8e3cb40ed64327a3623 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/raw/eng.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e31d7d92d59e04c2fec3f4ed8a8b8d8320852ac38547c9aef268e87adc9204 +size 49243 diff --git a/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt b/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b014bd2c2f6c1cfc515230928101c6002e1161e --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f15082d04114b3a3f7a3f1cca842a20c3723e70077f0a4439f35e7b56b48d55 +size 19739 diff --git a/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl b/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..76c69e58b4a0a682801b56d9db82065635bbaed6 --- /dev/null +++ b/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0497459f33dea5b40f948bb4669c686533b28884e1dd1de16e476629bde672 +size 82488 diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..aac109ae9f99791f7b70366a04d6a4e09f840f70 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,43 @@ ---- -license: mit ---- +# Data-Efficient Learning of Natural Language to Linear Temporal Logic Translators for Robot Task Specification + +[[Homepage](https://nl2hltl.github.io/)] [[Paper](https://arxiv.org/abs/2408.08188)] [[Video](https://youtu.be/o8CRrVK9g9Q)] [[Poster](https://nl2hltl.github.io/media/figures/overview.png)] + +> The associated repo for paper "Data-Efficient Learning of Natural Language to Linear Temporal Logic Translators for Robot Task Specification". + +## Introduction + +The dataset is modified based on the following projects, in which we replace the task related descriptions, into task independent descriptions + +task related NL2TL example (from [Efficient-Eng-2-LTL](https://github.com/UM-ARM-Lab/Efficient-Eng-2-LTL)) + +```json +"globally ( and ( until ( scan , not ( any cubes ) ) , finally ( any cubes ) ) )": { + "formula": "globally ( and ( until ( scan , not ( any cubes ) ) , finally ( any cubes ) ) )", + "raw": "G & U S ! A F A" + }, +``` + +task independent NL2TL example: +```json +{"natural": "go through the P01 until you get to the P04", "raw_ltl": "F ( P01 A ( F P04 ) )"} +``` + +Based task related NL2TL datasets: + - datasets + - [Efficient-Eng-2-LTL](https://github.com/UM-ARM-Lab/Efficient-Eng-2-LTL) + - [Lang2LTL](https://github.com/h2r/Lang2LTL) + - [nl2spec](https://github.com/realChrisHahn2/nl2spec) + - [NL2TL](https://github.com/yongchao98/NL2TL) + +## Cite +```bibtex +@misc{xu2024scalingnaturallanguageunderstanding, + title={Scaling Up Natural Language Understanding for Multi-Robots Through the Lens of Hierarchy}, + author={Shaojun Xu and Xusheng Luo and Yutong Huang and Letian Leng and Ruixuan Liu and Changliu Liu}, + year={2024}, + eprint={2408.08188}, + archivePrefix={arXiv}, + primaryClass={cs.RO}, + url={https://arxiv.org/abs/2408.08188}, +} +```