diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..590edb6bd09aee3ddc204183bef0215deb677a88 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jsonl filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.txt filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/NL2TL-dataset/NLTLsummary.json b/NL2TL-dataset/NLTLsummary.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d87bb2e9952565bc2302ee62f7d9c05caf323cb
--- /dev/null
+++ b/NL2TL-dataset/NLTLsummary.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6291896067a809119e01b43734779333c2d6b7baa36376afe8aa6579b4ba77ee
+size 23025
diff --git a/NL2TL-dataset/collect/Cleaned_ENG.txt b/NL2TL-dataset/collect/Cleaned_ENG.txt
new file mode 100644
index 0000000000000000000000000000000000000000..583bb81503ef6ab39746f2de154eaa3bc9075355
--- /dev/null
+++ b/NL2TL-dataset/collect/Cleaned_ENG.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77f8e6b696147098eb16be8ec7a2891a493c008e7d997f994d1b77fea4255559
+size 447181
diff --git a/NL2TL-dataset/collect/Cleaned_LTL.txt b/NL2TL-dataset/collect/Cleaned_LTL.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29d5eece04300dea13e6e87630484ec2e1c17d77
--- /dev/null
+++ b/NL2TL-dataset/collect/Cleaned_LTL.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ced5c8ee114bc50e480c44999e991337318749dd7f5988379c0f2cd3c5940d2
+size 252706
diff --git a/NL2TL-dataset/collect/UNCleaned_ENG.txt b/NL2TL-dataset/collect/UNCleaned_ENG.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0cf71ef6bc0813e6043b85b2d22fa0801a40bfc
--- /dev/null
+++ b/NL2TL-dataset/collect/UNCleaned_ENG.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29fe1e57c68d9096705eae98f6456aa24eb86ecc2a43fa7896785a30eba1c3d0
+size 79381
diff --git a/NL2TL-dataset/collect/UNCleaned_LTL.txt b/NL2TL-dataset/collect/UNCleaned_LTL.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5a27791f616775b8856d7b5f1a615105e1c94ce
--- /dev/null
+++ b/NL2TL-dataset/collect/UNCleaned_LTL.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f903103fcdc5b86bf1aca3568fd5483bfab90412fb1581006c0433c4ac0feecc
+size 34258
diff --git a/NL2TL-dataset/collect/UNCleaned_num.txt b/NL2TL-dataset/collect/UNCleaned_num.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8a53864f624b5b400b98abe0d2bf6c441443f8a
--- /dev/null
+++ b/NL2TL-dataset/collect/UNCleaned_num.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50067192dcabdc789ff68998a4b67b44d5ea71ad870b2f3029f06a62e93cd1cc
+size 7639
diff --git a/NL2TL-dataset/collect/eng.txt b/NL2TL-dataset/collect/eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e8460db6c584325c53ea7125010814a31de9a8f
--- /dev/null
+++ b/NL2TL-dataset/collect/eng.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:834f288fc7c4bf3c27e829db10d72f673b55551f5e50591e9ff05461b78417e7
+size 384436
diff --git a/NL2TL-dataset/collect/eng_gpt_auged.txt b/NL2TL-dataset/collect/eng_gpt_auged.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d053ca173aaaf74ae9a325b5e55e37beb9e1a93
--- /dev/null
+++ b/NL2TL-dataset/collect/eng_gpt_auged.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5d039a94cfde70475d1c935e80286d0c3bb9c578f01c86cf8f65aed015fdf14
+size 46038
diff --git a/NL2TL-dataset/collect/eng_gpt_auged2.txt b/NL2TL-dataset/collect/eng_gpt_auged2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d7860287b1483743b4ad87caf1cbd78fa185e5c
--- /dev/null
+++ b/NL2TL-dataset/collect/eng_gpt_auged2.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6484a6ace4d0bdd6e00b337aa2e734d4cd3cfdbcd03f90a960ea3859381ffaad
+size 96837
diff --git "a/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt" "b/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..4119f2a4468b1239684915faf511c744fe39e836
--- /dev/null
+++ "b/NL2TL-dataset/collect/eng\346\224\271\350\277\207\344\272\206\344\270\215\345\244\252\345\245\275.txt"
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e74a45639f6d71263ea70768e40a4d299137fca66de36da7657a15cbba60beb9
+size 379834
diff --git a/NL2TL-dataset/collect/idxsrc_gpt_auged.txt b/NL2TL-dataset/collect/idxsrc_gpt_auged.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1750947466989be039e7b3dc6a721b8b0c15acd
--- /dev/null
+++ b/NL2TL-dataset/collect/idxsrc_gpt_auged.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce789edf73241091f48e1e3a2b4eddf06ae0244fc20d4f050cbed474afd79ab6
+size 4425
diff --git a/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt b/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a72b023e0be446c4a18b890997f631ad1a6a903
--- /dev/null
+++ b/NL2TL-dataset/collect/idxsrc_gpt_auged2.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a71b50098910d35585cddb30b9fd0187898db7e0ea1ae5cc94a5bf7f100e81a4
+size 9273
diff --git a/NL2TL-dataset/collect/log.jsonl b/NL2TL-dataset/collect/log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a9e970c6df4c5f05f5eca58b1b3ba937724dc64d
--- /dev/null
+++ b/NL2TL-dataset/collect/log.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3a3e9ba897654fe39da61e0fc20687714fb1cd6da68d98eb604731b20d14fce
+size 561367
diff --git a/NL2TL-dataset/collect/ltl copy.txt b/NL2TL-dataset/collect/ltl copy.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93bffa2415f7c3f074cbe5e4777313eef08f1420
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl copy.txt	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
+size 192556
diff --git a/NL2TL-dataset/collect/ltl.txt b/NL2TL-dataset/collect/ltl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93bffa2415f7c3f074cbe5e4777313eef08f1420
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
+size 192556
diff --git a/NL2TL-dataset/collect/ltl_eng_1.jsonl b/NL2TL-dataset/collect/ltl_eng_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..16e9200f22ffc30f032c8107d8ccba60e45700b5
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55879344ea92bf9073b1b605dda98cfe551b9169186dde31592d15c9a24d47e4
+size 1043402
diff --git a/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl b/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..66944cc559d56be12a04ec020debe1c1b631e0ca
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e61fbb4657c7b6d81d26c6a8fa596f03156dba6308b17b54125f79cd3734c058
+size 1069183
diff --git a/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl b/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a0f04ea97c9f872e07a9ed28cb3a5af031eb7fb9
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_test-aug.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
+size 131838
diff --git a/NL2TL-dataset/collect/ltl_eng_test.jsonl b/NL2TL-dataset/collect/ltl_eng_test.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a0f04ea97c9f872e07a9ed28cb3a5af031eb7fb9
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_test.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
+size 131838
diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..54ab5b2681bad0d32d6420669f8057573a751437
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_test_mid.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4e75373318179d3cd3a2bbb330fdfe75376dabe77f475f31d95cf27f4d100c
+size 137907
diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..b461e2c6ee10aa40028ce6c0e5d86320b2ce683a
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9185fd98062c160c363f35116a943a0950fdd2151bd7ff61cb9df39921ca73f3
+size 137931
diff --git a/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a5e8262fc36f6db074d742b36d551722f1929077
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca41a9c7ea7dd2151e861ac7754f36eee0e386f7a0dbbf7d2d5cabf927ccb7b
+size 125920
diff --git a/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl b/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..2cbcfe7989d7b231d254158c1fa27ac3a6d36e87
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_train-aug.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
+size 911564
diff --git a/NL2TL-dataset/collect/ltl_eng_train.jsonl b/NL2TL-dataset/collect/ltl_eng_train.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..2cbcfe7989d7b231d254158c1fa27ac3a6d36e87
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_train.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
+size 911564
diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..178754ffb1bcf0ce59c0025793c5356931dd5428
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_train_mid.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0daf22f37a11d38edaacfd837ccabf59fc7395431166bc50b0336e030faf77d1
+size 937958
diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..571f8a195bc5ee59855ca0a5f8e70c0ae318be35
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3674e9aabc015509efadbe1f8374627d5aaaacfb8fe3c717fa1ff852dcb8c4f
+size 937958
diff --git a/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7130227a4bd581220025cfa9b0e131f0af43cb50
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cea13abc2204906d232ea88b78830ad6be09e01dbd5b7ad7ec9da9dff2f6c777
+size 1129386
diff --git a/NL2TL-dataset/collect/ltl_mid_order.txt b/NL2TL-dataset/collect/ltl_mid_order.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1201f43dc64624b6851b169ade596ce2a21291f1
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_mid_order.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:646500a7231278e0dd4ca0ed6fc2cc71ba4f3b7284a13a720a689a8e634db78e
+size 225055
diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f99d173ae876b56448d63f504deeb0d2cc9d192
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_mid_order_ascii.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f56781a159d3a52b58101038f0d15e1ef054b5f0290dd73228e51f4004243dd6
+size 225073
diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c36d06dcf99ff87ef245c8163186e0d0ef53efcd
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f332db20802ecce5a8bd2879316e66061c97d438d18c7cbc6d4899b3bfbcaa44
+size 19575
diff --git a/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..709fa5ff35af84230c647383272c28460a358d51
--- /dev/null
+++ b/NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfca7f2b1129e28f89e4dd68d76406ad26404514d08864aaf1c76f3fa01eb132
+size 40805
diff --git a/NL2TL-dataset/collect/note.txt b/NL2TL-dataset/collect/note.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e019bc077819de4a67bb77581911674f1b6d6664
--- /dev/null
+++ b/NL2TL-dataset/collect/note.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7798d0398769800f3cfc50fc902cec4cd1aa1eff05e3cb41790094c620f61039
+size 59
diff --git a/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..17cca6da44c7ebc99a22dbbb4850f18e70a32b64
--- /dev/null
+++ b/NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c131c3486f4debdf0ed416248de2892d41627abbf2fcd83c69051b7ba69bfb2b
+size 449106
diff --git a/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl b/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..aef9cd294a1c4685e353a7e74e0b3e4b3c432361
--- /dev/null
+++ b/NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930
+size 1839113
diff --git a/NL2TL-dataset/collect2/LTLexplain_0.json b/NL2TL-dataset/collect2/LTLexplain_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..74b05c4d764aad5d3038b7256b9568842bdc3170
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLexplain_0.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08175568f9105c8dd5e8e5413e8a90834ab48b0cd2c8396ca1a71ab97f69ef68
+size 149375
diff --git a/NL2TL-dataset/collect2/LTLexplain_1.json b/NL2TL-dataset/collect2/LTLexplain_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0fed24fe8d11d073037020ee045d6ebfc620efc
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLexplain_1.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2a0f1e1bfe11526ca4ad549cf263fcfd60d18d9ee27cf77b889a3d0b8728d37
+size 149123
diff --git a/NL2TL-dataset/collect2/LTLexplain_2.json b/NL2TL-dataset/collect2/LTLexplain_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4d9b737bc94de11eb8f874735a2bd3a73dcab69
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLexplain_2.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64ee6dd1b5f76edc89b57cc4384a7dc1ea1af31940016f844c8a22ee0b98c0e4
+size 82749
diff --git a/NL2TL-dataset/collect2/LTLexplain_3.json b/NL2TL-dataset/collect2/LTLexplain_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..8deb947c5e4ab2177c5b4c569cff116f389b9b14
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLexplain_3.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:052761b9c005136e13576c0bb4f279ad5c81e9e15abfd736e344028b32e8af89
+size 73448
diff --git a/NL2TL-dataset/collect2/LTLexplain_4.json b/NL2TL-dataset/collect2/LTLexplain_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..834b3ed504b974cc90b60ed2680d25967c4b5727
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLexplain_4.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3fa922f8b8402b2f950c18de7e2360a0aa7158297251e0ea1fa2d6c26337ccf
+size 147624
diff --git a/NL2TL-dataset/collect2/LTLsummary.json b/NL2TL-dataset/collect2/LTLsummary.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0b9914651b91c5367d20d85b47157e57727bc66
--- /dev/null
+++ b/NL2TL-dataset/collect2/LTLsummary.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11079ebb03aa8d76763c690f7c59431f32c25ce385657b3e68daa1b15c7ff734
+size 22393
diff --git a/NL2TL-dataset/collect2/getUniqueLTL.py b/NL2TL-dataset/collect2/getUniqueLTL.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e93b0c1966a344ade2484a1fda66221eef9809a
--- /dev/null
+++ b/NL2TL-dataset/collect2/getUniqueLTL.py
@@ -0,0 +1,20 @@
+import json 
+import re,os 
+
+def findUniqueLTL(paths:list):
+    ret={}
+    for path in paths:
+        with open(path,'r') as f:
+            jsonlists=f.readlines()
+            for jsonlist in jsonlists:
+                j=json.loads(jsonlist)
+                ret[j['raw_ltl']]=1
+    return ret
+
+if __name__=='__main__':
+    path=['/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl']
+    LTLs=findUniqueLTL(paths=path)
+    with open(os.path.join('/home/user/xsj/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f :
+        f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':')))
+    
+
diff --git a/NL2TL-dataset/collect2/lifted_data.jsonl b/NL2TL-dataset/collect2/lifted_data.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..105beb79da430fbc3d835d1206c2961fda1dcf6e
--- /dev/null
+++ b/NL2TL-dataset/collect2/lifted_data.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb
+size 12458149
diff --git a/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..9c784d20c3c5e0f66a030dac769086144d333c7a
--- /dev/null
+++ b/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41061064f1591c833a29ea73ce1888aaa831b488b0f4f0f2a04994c871a42873
+size 140979
diff --git a/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl b/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..08ec7b3a4a8772ddde8b7f2c87ee0f0e3a437b07
--- /dev/null
+++ b/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24f9856d3e7f3d882de35caf5e3d79e70b783630178e1fcbad197b31431b2bfa
+size 1264107
diff --git a/NL2TL-dataset/collect2/navi_total_refined.jsonl b/NL2TL-dataset/collect2/navi_total_refined.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1
--- /dev/null
+++ b/NL2TL-dataset/collect2/navi_total_refined.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8
+size 3825939
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..606740db79c1d093b34dcbee0b7302f0e4a9568f
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb
@@ -0,0 +1,374 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Augmentation by parapharsing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Init & Load Seed Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json, openai\n",
+    "from tqdm import tqdm "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DOMAIN = \"drone-planning/\"\n",
+    "# DOMAIN = \"clean-up/\"\n",
+    "# DOMAIN = \"pick-and-place/\"\n",
+    "with open(DOMAIN + \"train_seed.jsonl\") as f:\n",
+    "    train_seed = [json.loads(line) for line in f]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eng_seeds = {\n",
+    "    seed['natural']: [] for seed in train_seed\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Augmentation Code\n",
+    "prompting GPT-3 seems to work the best in this case"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You need to set your OPENAI API key here\n",
+    "# https://beta.openai.com/account/api-keys\n",
+    "openai.api_key = \"TO_BE_SET\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def normalize(sentence):\n",
+    "    # captialize first letter and add period at the end if not present\n",
+    "    if sentence[0].islower():\n",
+    "        sentence = sentence[0].upper() + sentence[1:]\n",
+    "    if sentence[-1] != '.':\n",
+    "        sentence = sentence + '.'\n",
+    "    return sentence\n",
+    "\n",
+    "def parse_sentences_from_response(response):\n",
+    "    lines = response.split('\\n')\n",
+    "    # assert len(lines) == 5\n",
+    "    assert len(lines) == 10\n",
+    "    lines[0] = \"1.\" + lines[0]\n",
+    "    paraphrases = []\n",
+    "    for idx, line in enumerate(lines):\n",
+    "        assert line.startswith(str(idx+1) + '. ')\n",
+    "        sentence_start_idx = len(str(idx+1) + '. ')\n",
+    "        paraphrases.append(line[sentence_start_idx:])\n",
+    "    for paraphrase in paraphrases:\n",
+    "        if paraphrase[-1] == ' ':\n",
+    "            if paraphrase[-2] == '.':\n",
+    "                paraphrase = paraphrase[:-1]\n",
+    "            else:\n",
+    "                paraphrase = paraphrase[:-2] + '.'\n",
+    "    return paraphrases\n",
+    "\n",
+    "\n",
+    "PROMPT = \"\"\"Rephrase the source sentence in 10 different ways. Make the outputs as diverse as possible.\n",
+    "\n",
+    "Source: \n",
+    "SOURCE-TO-BE-PLACED\n",
+    "\n",
+    "Outputs:\n",
+    "1.\"\"\"\n",
+    "def rephrase_a_sentence(sentence):\n",
+    "    response = openai.Completion.create(\n",
+    "        model=\"text-davinci-002\",\n",
+    "        prompt=PROMPT.replace(\"SOURCE-TO-BE-PLACED\", normalize(sentence)),\n",
+    "        temperature=0.7,\n",
+    "        max_tokens=512,\n",
+    "        top_p=1,\n",
+    "        best_of=1,\n",
+    "        frequency_penalty=0.1,\n",
+    "        presence_penalty=0\n",
+    "        )\n",
+    "    output = response['choices'][0]['text']\n",
+    "    try:\n",
+    "        paraphrases = parse_sentences_from_response(output)\n",
+    "    except:\n",
+    "        print(\"Error in parsing response\")\n",
+    "        print(output)\n",
+    "        return output, \"ERROR\"\n",
+    "    return parse_sentences_from_response(output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "O = rephrase_a_sentence(\"Go to the red room or go to the green room to finally go to the blue room.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "O"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run Augmentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(eng_seeds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(eng_seeds.keys())[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def paraphrase_done(eng_seeds):\n",
+    "    for eng_seed, extended in tqdm(eng_seeds.items()):\n",
+    "        if len(extended) == 0:\n",
+    "            return False\n",
+    "    return True\n",
+    "\n",
+    "while not paraphrase_done(eng_seeds):\n",
+    "    for eng_seed, extended in tqdm(eng_seeds.items()):\n",
+    "        if len(extended) == 0:\n",
+    "            extended += rephrase_a_sentence(eng_seed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eng_seeds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Dump as Training Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_seed[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
+    "    for seed in train_seed:\n",
+    "        f.write(json.dumps(seed) + '\\n')\n",
+    "        for aug_eng in eng_seeds[seed['natural']]:\n",
+    "                f.write(json.dumps({\n",
+    "                    'natural': aug_eng,\n",
+    "                    'canonical': seed['canonical'],\n",
+    "                    'formula': seed['formula']\n",
+    "                }) + '\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
+    "    for seed in train_seed:\n",
+    "        f.write(json.dumps(seed) + '\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Normalize the natural language form "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if DOMAIN == \"clean-up/\":\n",
+    "    # in clean up, golden natural language data comes without period at the end, no capitalization in the beginning\n",
+    "    def clean_up_normalize(sentence):\n",
+    "        if sentence[0].isupper():\n",
+    "            sentence = sentence[0].lower() + sentence[1:]\n",
+    "        if sentence[-1] == '.':\n",
+    "            sentence = sentence[:-1]\n",
+    "        return sentence\n",
+    "\n",
+    "    buffer = []\n",
+    "    with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
+    "        for l in f.readlines():\n",
+    "            buffer.append(json.loads(l))\n",
+    "    \n",
+    "    with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
+    "        for dp in buffer:\n",
+    "            f.write(json.dumps({\n",
+    "                'natural': clean_up_normalize(dp['natural']),\n",
+    "                'canonical': dp['canonical'],\n",
+    "                'formula': dp['formula']\n",
+    "            }) + '\\n')\n",
+    "\n",
+    "if DOMAIN == \"pick-and-place/\":\n",
+    "    # in pick and place, golden natural language data comes without period at the end, no capitalization in the beginning\n",
+    "    def clean_up_normalize(sentence):\n",
+    "        if sentence[0].isupper():\n",
+    "            sentence = sentence[0].lower() + sentence[1:]\n",
+    "        if sentence[-1] == '.':\n",
+    "            sentence = sentence[:-1]\n",
+    "        return sentence\n",
+    "\n",
+    "    buffer = []\n",
+    "    with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
+    "        for l in f.readlines():\n",
+    "            buffer.append(json.loads(l))\n",
+    "    \n",
+    "    with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
+    "        for dp in buffer:\n",
+    "            f.write(json.dumps({\n",
+    "                'natural': clean_up_normalize(dp['natural']),\n",
+    "                'canonical': dp['canonical'],\n",
+    "                'formula': dp['formula']\n",
+    "            }) + '\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if DOMAIN == \"drone-planning/\":\n",
+    "    # in clean up, golden natural language data comes with a \"space + period\" at the end, no capitalization in the beginning\n",
+    "    def clean_up_normalize(sentence):\n",
+    "        if sentence[0].isupper():\n",
+    "            sentence = sentence[0].lower() + sentence[1:]\n",
+    "        while sentence[-1] == ' ' or sentence[-1] == '.' or sentence[-1] == '!':\n",
+    "            sentence = sentence[:-1]\n",
+    "        sentence = sentence + '.'\n",
+    "        sentence = sentence.replace('.', ' .')\n",
+    "        sentence = sentence.replace(',', ' ,')\n",
+    "        return sentence\n",
+    "\n",
+    "    buffer = []\n",
+    "    # with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
+    "    #     for l in f.readlines():\n",
+    "    #         buffer.append(json.loads(l))\n",
+    "    \n",
+    "    # with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
+    "    #     for dp in buffer:\n",
+    "    #         f.write(json.dumps({\n",
+    "    #             'natural': clean_up_normalize(dp['natural']),\n",
+    "    #             'canonical': dp['canonical'],\n",
+    "    #             'formula': dp['formula']\n",
+    "    #         }) + '\\n')\n",
+    "    with open(DOMAIN + \"syn.train.jsonl\", 'r') as f:\n",
+    "        for l in f.readlines():\n",
+    "            buffer.append(json.loads(l))\n",
+    "    \n",
+    "    with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
+    "        for dp in buffer:\n",
+    "            f.write(json.dumps({\n",
+    "                'natural': clean_up_normalize(dp['natural']),\n",
+    "                'canonical': dp['canonical'],\n",
+    "                'formula': dp['formula']\n",
+    "            }) + '\\n')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "GPML",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.13"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "75567ad983eac98a78c1e40a895e8d82557b42cf9969286235abec07ddbf9e7d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json
new file mode 100644
index 0000000000000000000000000000000000000000..10b02e56e0fe39919f62150cf1214f3a07791447
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dfa917383f4e67d55a159050ffa60c83dea987236ef516edf7d2e45e16689d3
+size 7890
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json
new file mode 100644
index 0000000000000000000000000000000000000000..85aa28666ff922102299444a0e2a1bfbc04591d1
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7554f76c648d84596c8a77458bd615550be66d0903143146a8497797205315f4
+size 12442
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..57e0f01e18ba7e2df9437748bf855882516fd7d2
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24cc3906e8d32f3a1bad606ac8d262608065aa6ffbe3065fdf98b4b1fb754bc7
+size 599574
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..522f04596a0442e01ecbaeecb971bf7ac539bbcc
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d865ee0706e420cc35295a9c237e2ea3eceb0e0f87c7b1739013dd9448c73de1
+size 866586
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..20d7897135a49e09eac8314096c764515dbf1c81
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/preprocess.ipynb
@@ -0,0 +1,561 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"raw/ltl-lifted.txt\", \"r\") as f:\n",
+    "    raw_ltls = f.read().splitlines()\n",
+    "    unique_ltls = set(raw_ltls)\n",
+    "with open(\"raw/eng-lifted.txt\", 'r') as f:\n",
+    "    raw_engs = f.read().splitlines()\n",
+    "\n",
+    "DPs = []\n",
+    "for ltl, eng in zip(raw_ltls, raw_engs):\n",
+    "    DPs.append({'ltl': ltl, 'eng': eng})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "F P04\n",
+      "F P02\n",
+      "F P01\n",
+      "F P03\n",
+      "F & P02 F Z\n",
+      "F & P02 F X\n",
+      "F & P04 F P01\n",
+      "F & P03 F P01\n",
+      "F & P04 F P02\n",
+      "F & P04 F P03\n",
+      "F & P01 F P02\n",
+      "F & P01 F P04\n",
+      "F & P02 F P03\n",
+      "F & P02 F P01\n",
+      "F & P03 F P04\n",
+      "F & P02 F P04\n",
+      "F & P01 F P03\n",
+      "& F P04 G ! P01\n",
+      "& F P04 G ! P03\n",
+      "& F P04 G ! P02\n",
+      "& F P01 G ! P02\n",
+      "& F P03 G ! P04\n",
+      "& F P01 G ! P04\n",
+      "& F P02 G ! P01\n",
+      "& F P01 G ! P03\n",
+      "& F P03 G ! P02\n",
+      "& F P03 G ! P01\n",
+      "& F P02 G ! P04\n",
+      "& F P02 G ! P03\n",
+      "F & | P02 P03 F P04\n",
+      "F & | P02 P04 F P01\n",
+      "F & | P01 P03 F P04\n",
+      "F & | P03 P01 F P04\n",
+      "F & | P02 P01 F P03\n",
+      "F & | P02 P01 F P04\n",
+      "F & | P04 P02 F P01\n",
+      "F & | P04 P03 F P01\n",
+      "F & | P04 P03 F P02\n",
+      "F & | P02 P03 F P01\n"
+     ]
+    }
+   ],
+   "source": [
+    "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
+    "    print(ltl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Clean-up Domain, with augmentation\n",
+      "Number of Data Points 3382\n",
+      "Number of unique LTL expressions: 39\n",
+      "Number of unique LTL structures: 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Clean-up Domain, with augmentation\")\n",
+    "print(\"Number of Data Points\", len(DPs))\n",
+    "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
+    "print(\"Number of unique LTL structures:\", 4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "F & P01 F P04\n",
+      "Prop_1 keep going and stop when you reach Prop_4\n",
+      "\n",
+      "F P02\n",
+      "move to Prop_2\n",
+      "\n",
+      "& F P04 G ! P03\n",
+      "go only through rooms that are not Prop_3 to get to Prop_4\n",
+      "\n",
+      "F & P02 F X\n",
+      "move into Prop_2 and push the chair back into the purple room\n",
+      "\n",
+      "F & | P02 P03 F P01\n",
+      "go through the Prop_3 or Prop_2 to reach the Prop_1\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "seen_length = set()\n",
+    "for dp in DPs:\n",
+    "    if len(dp['ltl']) not in seen_length:\n",
+    "        seen_length.add(len(dp['ltl']))\n",
+    "        print(dp['ltl'])\n",
+    "        print(dp['eng'])\n",
+    "        print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean up LTL expressions "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Room_Types = [('P01', 'Prop_1'), ('P02', 'Prop_2'), ('P03', 'Prop_3'), ('P04', 'Prop_4')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "APs = [\n",
+    "    {'ap': 'P01', 'eng': 'Prop_1'},\n",
+    "    {'ap': 'P02', 'eng': 'Prop_2'},\n",
+    "    {'ap': 'P03', 'eng': 'Prop_3'},\n",
+    "    {'ap': 'P04', 'eng': 'Prop_4'},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_type_A(ap1):\n",
+    "    return {\n",
+    "        'raw_ltl': f\"F {ap1['ap']}\",\n",
+    "        'canonical_ltl': f\"finally {ap1['eng']}\",\n",
+    "        'eng': f\"{ap1['eng']}\"}\n",
+    "\n",
+    "def build_type_B(room_1, room_2):\n",
+    "    return {\n",
+    "        \"raw_ltl\": f\"F & {room_1['ap']} F {room_2['ap']}\",\n",
+    "        \"canonical_ltl\": f\"finally ( and (  {room_1['eng']} , finally ( {room_2['eng']} )  )  )\",\n",
+    "        \"eng\": f\"{room_1['eng']} first, and then {room_2['eng']}\"}\n",
+    "\n",
+    "def build_type_C(room_1, room_2):\n",
+    "    return {\n",
+    "        \"raw_ltl\": f\"& F {room_1['ap']} G ! {room_2['ap']}\",\n",
+    "        \"canonical_ltl\": f\"and ( finally ( {room_1['eng']} ) , globally ( not ( {room_2['eng']} ) ) )\",\n",
+    "        \"eng\": f\"{room_1['eng']}, and do not ever {room_2['eng']}\"\n",
+    "    }\n",
+    "\n",
+    "def build_type_D(room_1, room_2, room_3):\n",
+    "    return {\n",
+    "        \"raw_ltl\": f\"F & | {room_1['ap']} {room_2['ap']} F {room_3['ap']}\",\n",
+    "        \"canonical_ltl\": f\"finally ( and ( or ( {room_1['eng']} , {room_2['eng']} ) , finally ( {room_3['eng']} ) ) )\",\n",
+    "        \"eng\": f\"{room_1['eng']} or {room_2['eng']} to finally {room_3['eng']}\"\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "translation_seeds = []\n",
+    "for r1 in APs:\n",
+    "    translation_seeds.append(build_type_A(r1))\n",
+    "    for r2 in APs:\n",
+    "        if r1 == r2:\n",
+    "            continue\n",
+    "        translation_seeds.append(build_type_B(r1, r2))\n",
+    "        translation_seeds.append(build_type_C(r1, r2))\n",
+    "        for r3 in APs:\n",
+    "            if r1 == r3 or r2 == r3:\n",
+    "                continue\n",
+    "            translation_seeds.append(build_type_D(r1, r2, r3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "52\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(translation_seeds))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'F & P02 F X', 'F & P02 F Z'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
+    "print(unique_ltls - seed_ltls)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "additional_seed_1 = {\n",
+    "    \"raw_ltl\": \"F & P02 F X\",\n",
+    "    \"canonical_ltl\": \"finally ( and (  Prop_2 , finally ( Prop_1 )  )  )\",\n",
+    "    \"eng\": \"Prop_2 and Prop_1\"\n",
+    "}\n",
+    "\n",
+    "additional_seed_2 = {\n",
+    "    \"raw_ltl\": \"F & P02 F Z\",\n",
+    "    \"canonical_ltl\": \"finally ( and (  Prop_2 , finally ( Prop_4 )  )  )\",\n",
+    "    \"eng\": \"Prop_2 and Prop_4\"\n",
+    "}\n",
+    "\n",
+    "translation_seeds.append(additional_seed_1)\n",
+    "translation_seeds.append(additional_seed_2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "54"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(translation_seeds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "set()\n"
+     ]
+    }
+   ],
+   "source": [
+    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
+    "print(unique_ltls - seed_ltls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save the canonical decoding list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "possible_decodings = {}\n",
+    "for seed in translation_seeds:\n",
+    "    canonical = seed['canonical_ltl']\n",
+    "    possible_decodings[canonical] = {\n",
+    "        'formula': canonical,\n",
+    "        'raw': seed['raw_ltl'],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"canonical-lifted.json\", 'w') as f:\n",
+    "    f.write(json.dumps(possible_decodings, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save translation seed for zero shot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"train_seed-lifted.jsonl\", \"w\") as f:\n",
+    "    for dp in translation_seeds:\n",
+    "        better_ltl = dp['canonical_ltl']\n",
+    "        entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save golden data for evaluation  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_canonical_mapping = {\n",
+    "    seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'F P01': 'finally Prop_1',\n",
+       " 'F & P01 F P02': 'finally ( and (  Prop_1 , finally ( Prop_2 )  )  )',\n",
+       " '& F P01 G ! P02': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_2 ) ) )',\n",
+       " 'F & | P01 P02 F P03': 'finally ( and ( or ( Prop_1 , Prop_2 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F & | P01 P02 F P04': 'finally ( and ( or ( Prop_1 , Prop_2 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P01 F P03': 'finally ( and (  Prop_1 , finally ( Prop_3 )  )  )',\n",
+       " '& F P01 G ! P03': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_3 ) ) )',\n",
+       " 'F & | P01 P03 F P02': 'finally ( and ( or ( Prop_1 , Prop_3 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F & | P01 P03 F P04': 'finally ( and ( or ( Prop_1 , Prop_3 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P01 F P04': 'finally ( and (  Prop_1 , finally ( Prop_4 )  )  )',\n",
+       " '& F P01 G ! P04': 'and ( finally ( Prop_1 ) , globally ( not ( Prop_4 ) ) )',\n",
+       " 'F & | P01 P04 F P02': 'finally ( and ( or ( Prop_1 , Prop_4 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F & | P01 P04 F P03': 'finally ( and ( or ( Prop_1 , Prop_4 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F P02': 'finally Prop_2',\n",
+       " 'F & P02 F P01': 'finally ( and (  Prop_2 , finally ( Prop_1 )  )  )',\n",
+       " '& F P02 G ! P01': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_1 ) ) )',\n",
+       " 'F & | P02 P01 F P03': 'finally ( and ( or ( Prop_2 , Prop_1 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F & | P02 P01 F P04': 'finally ( and ( or ( Prop_2 , Prop_1 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P02 F P03': 'finally ( and (  Prop_2 , finally ( Prop_3 )  )  )',\n",
+       " '& F P02 G ! P03': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_3 ) ) )',\n",
+       " 'F & | P02 P03 F P01': 'finally ( and ( or ( Prop_2 , Prop_3 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P02 P03 F P04': 'finally ( and ( or ( Prop_2 , Prop_3 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P02 F P04': 'finally ( and (  Prop_2 , finally ( Prop_4 )  )  )',\n",
+       " '& F P02 G ! P04': 'and ( finally ( Prop_2 ) , globally ( not ( Prop_4 ) ) )',\n",
+       " 'F & | P02 P04 F P01': 'finally ( and ( or ( Prop_2 , Prop_4 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P02 P04 F P03': 'finally ( and ( or ( Prop_2 , Prop_4 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F P03': 'finally Prop_3',\n",
+       " 'F & P03 F P01': 'finally ( and (  Prop_3 , finally ( Prop_1 )  )  )',\n",
+       " '& F P03 G ! P01': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_1 ) ) )',\n",
+       " 'F & | P03 P01 F P02': 'finally ( and ( or ( Prop_3 , Prop_1 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F & | P03 P01 F P04': 'finally ( and ( or ( Prop_3 , Prop_1 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P03 F P02': 'finally ( and (  Prop_3 , finally ( Prop_2 )  )  )',\n",
+       " '& F P03 G ! P02': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_2 ) ) )',\n",
+       " 'F & | P03 P02 F P01': 'finally ( and ( or ( Prop_3 , Prop_2 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P03 P02 F P04': 'finally ( and ( or ( Prop_3 , Prop_2 ) , finally ( Prop_4 ) ) )',\n",
+       " 'F & P03 F P04': 'finally ( and (  Prop_3 , finally ( Prop_4 )  )  )',\n",
+       " '& F P03 G ! P04': 'and ( finally ( Prop_3 ) , globally ( not ( Prop_4 ) ) )',\n",
+       " 'F & | P03 P04 F P01': 'finally ( and ( or ( Prop_3 , Prop_4 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P03 P04 F P02': 'finally ( and ( or ( Prop_3 , Prop_4 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F P04': 'finally Prop_4',\n",
+       " 'F & P04 F P01': 'finally ( and (  Prop_4 , finally ( Prop_1 )  )  )',\n",
+       " '& F P04 G ! P01': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_1 ) ) )',\n",
+       " 'F & | P04 P01 F P02': 'finally ( and ( or ( Prop_4 , Prop_1 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F & | P04 P01 F P03': 'finally ( and ( or ( Prop_4 , Prop_1 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F & P04 F P02': 'finally ( and (  Prop_4 , finally ( Prop_2 )  )  )',\n",
+       " '& F P04 G ! P02': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_2 ) ) )',\n",
+       " 'F & | P04 P02 F P01': 'finally ( and ( or ( Prop_4 , Prop_2 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P04 P02 F P03': 'finally ( and ( or ( Prop_4 , Prop_2 ) , finally ( Prop_3 ) ) )',\n",
+       " 'F & P04 F P03': 'finally ( and (  Prop_4 , finally ( Prop_3 )  )  )',\n",
+       " '& F P04 G ! P03': 'and ( finally ( Prop_4 ) , globally ( not ( Prop_3 ) ) )',\n",
+       " 'F & | P04 P03 F P01': 'finally ( and ( or ( Prop_4 , Prop_3 ) , finally ( Prop_1 ) ) )',\n",
+       " 'F & | P04 P03 F P02': 'finally ( and ( or ( Prop_4 , Prop_3 ) , finally ( Prop_2 ) ) )',\n",
+       " 'F & P02 F X': 'finally ( and (  Prop_2 , finally ( Prop_1 )  )  )',\n",
+       " 'F & P02 F Z': 'finally ( and (  Prop_2 , finally ( Prop_4 )  )  )'}"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_canonical_mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3382"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(DPs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ltl': 'F & P01 F P04',\n",
+       " 'eng': 'Prop_1 keep going and stop when you reach Prop_4'}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DPs[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"golden-lifted.jsonl\", \"w\") as f:\n",
+    "    for dp in DPs:\n",
+    "        entry = {\n",
+    "            'canonical': raw_canonical_mapping[dp['ltl']],\n",
+    "            'formula': raw_canonical_mapping[dp['ltl']],\n",
+    "            'natural': dp['eng'],\n",
+    "            'raw_ltl': dp['ltl'],\n",
+    "        }\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.12 ('GPML')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.17"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..908f836237fc46a2ff5822e68a4c90eda125c2a2
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/eng.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1397a0bea0699ad7464d48c1ae20497389dca87d8a31f82463e81249c02f5a53
+size 123277
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e61184a8d2f34e8e7cdda36a959ff3892ad3be2
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/raw/ltl.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:422d08898216ade2b1f301f07871a6c7ebd267de43d9bb5c57529799bb3ae048
+size 44583
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..9936b7564a8f538ae238080e2ab50b53524dcace
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed-lifted.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa23e9cdf5c8e4e363ddd63cd09d62bb0b8064335d224cdb6d3f0f428756d764
+size 8518
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..83248c03dd298606a9c15e14c7f04f12ef9cc872
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/train_seed.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e45f3476cc3c731fae04bb82ae8496ba1bb9c5b88eeff9e28307fc58372c012a
+size 15301
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json
new file mode 100644
index 0000000000000000000000000000000000000000..36e46a293620fda68c6c3c2d91098156085da0c8
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/canonical.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c09ca4c086b268eae4539e446b10d22a887805fc5295b0b9dae0bab0e82ee47
+size 73886
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..5a96c1d0f217c2a2c3cea7a033f4dc0830ecec77
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/golden.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e0b3ed2687a4a4f0a42898a522d4e682fcd2fcfbef98c1554517e72e880720f
+size 1187504
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f4cf258de2c382cf339c37ed4de6e513652541e6
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/preprocess.ipynb
@@ -0,0 +1,817 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"raw/ltl.txt\", \"r\") as f:\n",
+    "    raw_ltls = f.read().splitlines()\n",
+    "    unique_ltls = set(raw_ltls)\n",
+    "with open(\"raw/eng.txt\", 'r') as f:\n",
+    "    raw_engs = f.read().splitlines()\n",
+    "\n",
+    "DPs = []\n",
+    "for ltl, eng in zip(raw_ltls, raw_engs):\n",
+    "    DPs.append({'ltl': ltl, 'eng': eng})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "F ( P04 )\n",
+      "F ( P01 )\n",
+      "F ( P10 )\n",
+      "F ( P11 )\n",
+      "F ( P12 )\n",
+      "F ( P07 )\n",
+      "F ( P05 )\n",
+      "F ( P08 )\n",
+      "F ( P02 )\n",
+      "F ( P09 )\n",
+      "F ( P06 )\n",
+      "F ( P03 )\n",
+      "F ( P09 & ~ P01 )\n",
+      "F ( P12 & ~ P04 )\n",
+      "F ( P06 & ~ P05 )\n",
+      "F ( P03 & ~ P02 )\n",
+      "F ( P06 & ~ P04 )\n",
+      "F ( P08 & ~ P12 )\n",
+      "F ( P02 & ~ P11 )\n",
+      "F ( P04 & ~ P05 )\n",
+      "F ( P11 & ~ P02 )\n",
+      "F ( P12 & ~ P03 )\n",
+      "F ( P07 & ~ P04 )\n",
+      "F ( P11 & ~ P12 )\n",
+      "F ( P07 & ~ P06 )\n",
+      "F ( P04 & ~ P10 )\n",
+      "F ( P08 & ~ P10 )\n",
+      "F ( P09 & ~ P05 )\n",
+      "F ( P04 & ~ P03 )\n",
+      "F ( P04 & ~ P11 )\n",
+      "F ( P02 & ~ P05 )\n",
+      "F ( P01 & ~ P10 )\n",
+      "F ( P12 & ~ P05 )\n",
+      "F ( P11 & ~ P06 )\n",
+      "F ( P03 & ~ P11 )\n",
+      "F ( P06 & ~ P11 )\n",
+      "F ( P04 & ~ P06 )\n",
+      "F ( P08 & ~ P11 )\n",
+      "F ( P11 & ~ P05 )\n",
+      "F ( P10 & ~ P05 )\n",
+      "F ( P12 & ~ P10 )\n",
+      "F ( P07 & ~ P10 )\n",
+      "F ( P08 & ~ P04 )\n",
+      "F ( P08 & ~ P03 )\n",
+      "F ( P07 & ~ P02 )\n",
+      "F ( P04 & ~ P12 )\n",
+      "F ( P07 & ~ P01 )\n",
+      "F ( P10 & ~ P01 )\n",
+      "F ( P06 & ~ P12 )\n",
+      "F ( P07 & ~ P03 )\n",
+      "F ( P11 & ~ P04 )\n",
+      "F ( P08 & ~ P02 )\n",
+      "F ( P04 & ~ P02 )\n",
+      "F ( P12 & ~ P02 )\n",
+      "F ( P11 & ~ P03 )\n",
+      "F ( P02 & ~ P01 )\n",
+      "F ( P07 & ~ P11 )\n",
+      "F ( P06 & ~ P10 )\n",
+      "F ( P09 & ~ P10 )\n",
+      "F ( P10 & ~ P06 )\n",
+      "F ( P02 & ~ P12 )\n",
+      "F ( P09 & ~ P11 )\n",
+      "F ( P07 & ~ P05 )\n",
+      "F ( P04 & ~ P01 )\n",
+      "F ( P09 & ~ P03 )\n",
+      "F ( P03 & ~ P06 )\n",
+      "F ( P08 & ~ P01 )\n",
+      "F ( P09 & ~ P06 )\n",
+      "F ( P03 & ~ P01 )\n",
+      "F ( P05 & ~ P03 )\n",
+      "F ( P03 & ~ P05 )\n",
+      "F ( P06 & ~ P01 )\n",
+      "F ( P05 & ~ P10 )\n",
+      "F ( P12 & ~ P01 )\n",
+      "F ( P11 & ~ P10 )\n",
+      "F ( P03 & ~ P12 )\n",
+      "F ( P03 & ~ P04 )\n",
+      "F ( P01 & ~ P11 )\n",
+      "F ( P05 & ~ P02 )\n",
+      "F ( P02 & ~ P03 )\n",
+      "F ( P02 & ~ P06 )\n",
+      "F ( P07 & ~ P12 )\n",
+      "F ( P05 & ~ P11 )\n",
+      "F ( P05 & ~ P06 )\n",
+      "F ( P05 & ~ P12 )\n",
+      "F ( P05 & ~ P01 )\n",
+      "F ( P02 & ~ P04 )\n",
+      "F ( P02 & ~ P10 )\n",
+      "F ( P08 & ~ P05 )\n",
+      "F ( P06 & ~ P03 )\n",
+      "F ( P10 & ~ P12 )\n",
+      "F ( P08 & ~ P06 )\n",
+      "F ( P10 & ~ P03 )\n",
+      "F ( P12 & ~ P06 )\n",
+      "F ( P09 & ~ P04 )\n",
+      "F ( P10 & ~ P02 )\n",
+      "F ( P05 & ~ P04 )\n",
+      "F ( P11 & ~ P01 )\n",
+      "F ( P09 & ~ P02 )\n",
+      "F ( P10 & ~ P04 )\n",
+      "F ( P06 & ~ P02 )\n",
+      "F ( P01 & ~ P12 )\n",
+      "F ( P03 & ~ P10 )\n",
+      "F ( P09 & ~ P12 )\n",
+      "~ ( P04 ) U ( P12 )\n",
+      "~ ( P03 ) U ( P07 )\n",
+      "~ ( P12 ) U ( P02 )\n",
+      "~ ( P05 ) U ( P02 )\n",
+      "~ ( P12 ) U ( P05 )\n",
+      "~ ( P06 ) U ( P11 )\n",
+      "~ ( P06 ) U ( P07 )\n",
+      "~ ( P12 ) U ( P06 )\n",
+      "~ ( P03 ) U ( P12 )\n",
+      "~ ( P01 ) U ( P07 )\n",
+      "~ ( P04 ) U ( P02 )\n",
+      "~ ( P02 ) U ( P09 )\n",
+      "~ ( P05 ) U ( P10 )\n",
+      "~ ( P04 ) U ( P05 )\n",
+      "~ ( P06 ) U ( P08 )\n",
+      "~ ( P06 ) U ( P05 )\n",
+      "~ ( P04 ) U ( P03 )\n",
+      "~ ( P03 ) U ( P09 )\n",
+      "~ ( P10 ) U ( P01 )\n",
+      "~ ( P03 ) U ( P10 )\n",
+      "~ ( P11 ) U ( P04 )\n",
+      "~ ( P06 ) U ( P12 )\n",
+      "~ ( P06 ) U ( P10 )\n",
+      "~ ( P02 ) U ( P06 )\n",
+      "~ ( P05 ) U ( P07 )\n",
+      "~ ( P04 ) U ( P01 )\n",
+      "~ ( P02 ) U ( P11 )\n",
+      "~ ( P02 ) U ( P10 )\n",
+      "~ ( P02 ) U ( P08 )\n",
+      "~ ( P05 ) U ( P04 )\n",
+      "~ ( P10 ) U ( P11 )\n",
+      "~ ( P12 ) U ( P11 )\n",
+      "~ ( P10 ) U ( P04 )\n",
+      "~ ( P01 ) U ( P10 )\n",
+      "~ ( P03 ) U ( P08 )\n",
+      "~ ( P06 ) U ( P09 )\n",
+      "~ ( P02 ) U ( P07 )\n",
+      "~ ( P01 ) U ( P11 )\n",
+      "~ ( P05 ) U ( P01 )\n",
+      "~ ( P03 ) U ( P04 )\n",
+      "~ ( P02 ) U ( P03 )\n",
+      "~ ( P12 ) U ( P04 )\n",
+      "~ ( P04 ) U ( P10 )\n",
+      "~ ( P10 ) U ( P12 )\n",
+      "~ ( P11 ) U ( P06 )\n",
+      "~ ( P06 ) U ( P02 )\n",
+      "~ ( P02 ) U ( P05 )\n",
+      "~ ( P05 ) U ( P12 )\n",
+      "~ ( P03 ) U ( P06 )\n",
+      "~ ( P04 ) U ( P07 )\n",
+      "~ ( P11 ) U ( P05 )\n",
+      "~ ( P04 ) U ( P08 )\n",
+      "~ ( P12 ) U ( P03 )\n",
+      "~ ( P04 ) U ( P06 )\n",
+      "~ ( P02 ) U ( P04 )\n",
+      "~ ( P06 ) U ( P04 )\n",
+      "~ ( P03 ) U ( P02 )\n",
+      "~ ( P12 ) U ( P01 )\n",
+      "~ ( P02 ) U ( P12 )\n",
+      "~ ( P11 ) U ( P03 )\n",
+      "~ ( P11 ) U ( P01 )\n",
+      "~ ( P03 ) U ( P11 )\n",
+      "~ ( P04 ) U ( P09 )\n",
+      "~ ( P11 ) U ( P02 )\n",
+      "~ ( P10 ) U ( P05 )\n",
+      "~ ( P03 ) U ( P05 )\n",
+      "~ ( P12 ) U ( P10 )\n",
+      "~ ( P06 ) U ( P03 )\n",
+      "~ ( P05 ) U ( P06 )\n",
+      "~ ( P05 ) U ( P11 )\n",
+      "~ ( P04 ) U ( P11 )\n",
+      "~ ( P10 ) U ( P03 )\n",
+      "~ ( P01 ) U ( P12 )\n",
+      "~ ( P05 ) U ( P08 )\n",
+      "~ ( P10 ) U ( P06 )\n",
+      "~ ( P01 ) U ( P09 )\n",
+      "~ ( P05 ) U ( P03 )\n",
+      "~ ( P05 ) U ( P09 )\n",
+      "~ ( P06 ) U ( P01 )\n",
+      "~ ( P03 ) U ( P01 )\n",
+      "~ ( P02 ) U ( P01 )\n",
+      "~ ( P10 ) U ( P02 )\n",
+      "~ ( P01 ) U ( P08 )\n",
+      "F ( P10 & F ( P04 ) )\n",
+      "F ( P12 ) & G ( P04 )\n",
+      "F ( P03 & F ( P02 ) )\n",
+      "F ( P10 ) & G ( P07 )\n",
+      "F ( P10 ) & G ( P01 )\n",
+      "F ( P10 & F ( P02 ) )\n",
+      "F ( P07 & F ( P06 ) )\n",
+      "F ( P04 & F ( P07 ) )\n",
+      "F ( P12 ) & G ( P02 )\n",
+      "F ( P02 & F ( P03 ) )\n",
+      "F ( P03 ) & G ( P08 )\n",
+      "F ( P08 & F ( P11 ) )\n",
+      "F ( P07 & F ( P01 ) )\n",
+      "F ( P12 & F ( P02 ) )\n",
+      "F ( P01 ) & G ( P09 )\n",
+      "F ( P12 & F ( P11 ) )\n",
+      "F ( P10 & F ( P03 ) )\n",
+      "F ( P06 ) & G ( P07 )\n",
+      "F ( P12 ) & G ( P08 )\n",
+      "F ( P11 & F ( P02 ) )\n",
+      "F ( P10 & F ( P08 ) )\n",
+      "F ( P03 & F ( P10 ) )\n",
+      "F ( P10 & F ( P05 ) )\n",
+      "F ( P06 ) & G ( P09 )\n",
+      "F ( P02 & F ( P04 ) )\n",
+      "F ( P07 & F ( P03 ) )\n",
+      "F ( P12 ) & G ( P05 )\n",
+      "F ( P06 & F ( P04 ) )\n",
+      "F ( P04 ) & G ( P08 )\n",
+      "F ( P12 ) & G ( P03 )\n",
+      "F ( P01 & F ( P07 ) )\n",
+      "F ( P05 & F ( P09 ) )\n",
+      "F ( P06 ) & G ( P08 )\n",
+      "F ( P09 & F ( P02 ) )\n",
+      "F ( P12 & F ( P06 ) )\n",
+      "F ( P01 & F ( P03 ) )\n",
+      "F ( P08 & F ( P12 ) )\n",
+      "F ( P03 & F ( P07 ) )\n",
+      "F ( P11 & F ( P03 ) )\n",
+      "F ( P10 ) & G ( P04 )\n",
+      "F ( P12 ) & G ( P07 )\n",
+      "F ( P01 & F ( P11 ) )\n",
+      "F ( P03 & F ( P09 ) )\n",
+      "F ( P10 ) & G ( P02 )\n",
+      "F ( P04 ) & G ( P07 )\n",
+      "F ( P09 & F ( P10 ) )\n",
+      "F ( P01 & F ( P08 ) )\n",
+      "F ( P10 & F ( P01 ) )\n",
+      "F ( P01 & F ( P10 ) )\n",
+      "F ( P08 & F ( P01 ) )\n",
+      "F ( P03 & F ( P06 ) )\n",
+      "F ( P07 & F ( P10 ) )\n",
+      "F ( P09 & F ( P07 ) )\n",
+      "F ( P10 & F ( P12 ) )\n",
+      "F ( P12 ) & G ( P01 )\n",
+      "F ( P10 & F ( P09 ) )\n",
+      "F ( P08 & F ( P06 ) )\n",
+      "F ( P07 & F ( P04 ) )\n",
+      "F ( P02 & F ( P10 ) )\n",
+      "F ( P11 ) & G ( P03 )\n",
+      "F ( P01 & F ( P02 ) )\n",
+      "F ( P09 & F ( P11 ) )\n",
+      "F ( P11 ) & G ( P08 )\n",
+      "F ( P10 ) & G ( P06 )\n",
+      "F ( P07 & F ( P02 ) )\n",
+      "F ( P04 & F ( P02 ) )\n",
+      "F ( P12 & F ( P01 ) )\n",
+      "F ( P12 ) & G ( P09 )\n",
+      "F ( P03 & F ( P08 ) )\n",
+      "F ( P11 ) & G ( P02 )\n",
+      "F ( P02 & F ( P05 ) )\n",
+      "F ( P01 & F ( P04 ) )\n",
+      "F ( P03 & F ( P05 ) )\n",
+      "F ( P06 & F ( P05 ) )\n",
+      "F ( P02 ) & G ( P09 )\n",
+      "F ( P09 & F ( P05 ) )\n",
+      "F ( P06 & F ( P09 ) )\n",
+      "F ( P01 & F ( P12 ) )\n",
+      "F ( P05 & F ( P10 ) )\n",
+      "F ( P05 ) & G ( P09 )\n",
+      "F ( P04 & F ( P10 ) )\n",
+      "F ( P05 & F ( P03 ) )\n",
+      "F ( P09 & F ( P06 ) )\n",
+      "F ( P11 ) & G ( P09 )\n",
+      "F ( P12 & F ( P08 ) )\n",
+      "F ( P10 & F ( P11 ) )\n",
+      "F ( P06 & F ( P07 ) )\n",
+      "F ( P06 & F ( P12 ) )\n",
+      "F ( P05 & F ( P11 ) )\n",
+      "F ( P08 & F ( P03 ) )\n",
+      "F ( P08 & F ( P10 ) )\n",
+      "F ( P10 & F ( P07 ) )\n",
+      "F ( P03 ) & G ( P07 )\n",
+      "F ( P01 ) & G ( P08 )\n",
+      "F ( P08 & F ( P02 ) )\n",
+      "F ( P06 & F ( P10 ) )\n",
+      "F ( P10 & F ( P06 ) )\n",
+      "F ( P11 ) & G ( P01 )\n",
+      "F ( P09 & F ( P03 ) )\n",
+      "F ( P08 & F ( P04 ) )\n",
+      "F ( P02 ) & G ( P08 )\n",
+      "F ( P12 & F ( P05 ) )\n",
+      "F ( P03 & F ( P12 ) )\n",
+      "F ( P03 ) & G ( P09 )\n",
+      "F ( P05 & F ( P04 ) )\n",
+      "F ( P01 ) & G ( P07 )\n",
+      "F ( P02 & F ( P12 ) )\n",
+      "F ( P11 & F ( P09 ) )\n",
+      "F ( P04 & F ( P05 ) )\n",
+      "F ( P07 & F ( P12 ) )\n",
+      "F ( P09 & F ( P08 ) )\n",
+      "F ( P09 & F ( P04 ) )\n",
+      "F ( P05 & F ( P02 ) )\n",
+      "F ( P12 ) & G ( P06 )\n",
+      "F ( P11 & F ( P05 ) )\n",
+      "F ( P10 ) & G ( P09 )\n",
+      "F ( P07 & F ( P05 ) )\n",
+      "F ( P06 & F ( P02 ) )\n",
+      "F ( P11 ) & G ( P05 )\n",
+      "F ( P04 & F ( P11 ) )\n",
+      "F ( P04 & F ( P12 ) )\n",
+      "F ( P10 ) & G ( P08 )\n",
+      "F ( P02 ) & G ( P07 )\n",
+      "F ( P03 & F ( P11 ) )\n",
+      "F ( P06 & F ( P03 ) )\n",
+      "F ( P08 & F ( P05 ) )\n",
+      "F ( P11 ) & G ( P07 )\n",
+      "F ( P04 ) & G ( P09 )\n",
+      "F ( P05 & F ( P08 ) )\n",
+      "F ( P07 & F ( P08 ) )\n",
+      "F ( P11 & F ( P01 ) )\n",
+      "F ( P04 & F ( P03 ) )\n",
+      "F ( P09 & F ( P01 ) )\n",
+      "F ( P12 & F ( P10 ) )\n",
+      "F ( P05 ) & G ( P08 )\n",
+      "F ( P11 & F ( P08 ) )\n",
+      "F ( P11 ) & G ( P04 )\n",
+      "F ( P04 & F ( P09 ) )\n",
+      "F ( P11 & F ( P07 ) )\n",
+      "F ( P05 ) & G ( P07 )\n",
+      "F ( P02 & F ( P09 ) )\n",
+      "F ( P08 & F ( P07 ) )\n",
+      "F ( P10 ) & G ( P03 )\n",
+      "F ( P12 & F ( P09 ) )\n",
+      "F ( P11 & F ( P04 ) )\n",
+      "F ( P11 ) & G ( P06 )\n",
+      "F ( P07 & F ( P11 ) )\n",
+      "F ( P02 & F ( P11 ) )\n",
+      "F ( P02 & F ( P06 ) )\n",
+      "F ( P05 & F ( P06 ) )\n",
+      "F ( P06 & F ( P08 ) )\n",
+      "F ( P05 & F ( P12 ) )\n",
+      "F ( P12 & F ( P03 ) )\n",
+      "F ( P04 & F ( P08 ) )\n",
+      "F ( P04 & F ( P06 ) )\n",
+      "F ( P02 & F ( P08 ) )\n",
+      "F ( P03 & F ( P04 ) )\n",
+      "F ( P12 & F ( P04 ) )\n",
+      "F ( P10 ) & G ( P05 )\n",
+      "F ( P09 & F ( P12 ) )\n",
+      "F ( P01 & F ( P09 ) )\n",
+      "F ( P01 & F ( P05 ) )\n",
+      "F ( P05 & F ( P07 ) )\n",
+      "F ( P12 & F ( P07 ) )\n",
+      "F ( P01 & F ( P06 ) )\n",
+      "F ( P06 & F ( P11 ) )\n",
+      "F ( P02 & F ( P07 ) )\n",
+      "F ( P11 & F ( P06 ) )\n"
+     ]
+    }
+   ],
+   "source": [
+    "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
+    "    print(ltl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Drone-planning Domain, with augmentation\n",
+      "Number of Data Points 6185\n",
+      "Number of unique LTL expressions: 355\n",
+      "Number of unique LTL structures: 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Drone-planning Domain, with augmentation\")\n",
+    "print(\"Number of Data Points\", len(DPs))\n",
+    "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
+    "print(\"Number of unique LTL structures:\", 5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "F ( AP )\n",
+    "- Go to AP\n",
+    "\n",
+    "F ( AP1 & F ( AP2 ) )\n",
+    "- First go to AP1 and then to AP2\n",
+    "\n",
+    "\n",
+    "F ( AP1 ) & G ( AP2 )\n",
+    "- Go to AP1 and Always in AP2\n",
+    "\n",
+    "\n",
+    "F ( AP1 ) & ~ ( AP2 )\n",
+    "- Go to AP1 and avoid AP2 \n",
+    "\n",
+    "~ ( AP1 ) U ( AP2 )\n",
+    "- avoid going through AP1 until go through AP2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean up LTL expressions "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ori_aps = set()\n",
+    "for unique_ltl in unique_ltls:\n",
+    "    ori_aps.update(unique_ltl.split())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'&',\n",
+       " '(',\n",
+       " ')',\n",
+       " 'F',\n",
+       " 'G',\n",
+       " 'P01',\n",
+       " 'P02',\n",
+       " 'P03',\n",
+       " 'P04',\n",
+       " 'P05',\n",
+       " 'P06',\n",
+       " 'P07',\n",
+       " 'P08',\n",
+       " 'P09',\n",
+       " 'P10',\n",
+       " 'P11',\n",
+       " 'P12',\n",
+       " 'U',\n",
+       " '~'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ori_aps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "APs = [\n",
+    "    {'ap': 'blue_room', 'eng': 'the blue room'}, \n",
+    "    {'ap': 'green_room', 'eng': 'the green room'},\n",
+    "    {'ap': 'orange_room', 'eng': 'the orange room'},\n",
+    "    {'ap': 'purple_room', 'eng': 'the purple room'},\n",
+    "    {'ap': 'red_room', 'eng': 'the red room'},\n",
+    "    {'ap': 'yellow_room', 'eng': 'the yellow room'},\n",
+    "    {'ap': 'first_floor', 'eng': 'the first floor'},\n",
+    "    {'ap': 'second_floor', 'eng': 'the second floor'},\n",
+    "    {'ap': 'third_floor', 'eng': 'the third floor'},\n",
+    "    {'ap': 'landmark_1', 'eng': 'landmark 1'},\n",
+    "    {'ap': 'landmark_2', 'eng': 'landmark 2'},\n",
+    "    {'ap': 'landmark_3', 'eng': 'landmark 3'},\n",
+    "]\n",
+    "APs = [\n",
+    "    {'ap': 'P01', 'eng': 'Prop_01'},\n",
+    "    {'ap': 'P02', 'eng': 'Prop_02'},\n",
+    "    {'ap': 'P03', 'eng': 'Prop_03'},\n",
+    "    {'ap': 'P04', 'eng': 'Prop_04'},\n",
+    "    {'ap': 'P05', 'eng': 'Prop_05'},\n",
+    "    {'ap': 'P06', 'eng': 'Prop_06'},\n",
+    "    {'ap': 'P07', 'eng': 'Prop_07'},\n",
+    "    {'ap': 'P08', 'eng': 'Prop_08'},\n",
+    "    {'ap': 'P09', 'eng': 'Prop_09'},\n",
+    "    {'ap': 'P10', 'eng': 'Prop_10'},\n",
+    "    {'ap': 'P11', 'eng': 'Prop_11'},\n",
+    "    {'ap': 'P12', 'eng': 'Prop_12'},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from the translator\n",
+    "def build_type_A(ap1):\n",
+    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
+    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
+    "    canonical_ltl = f\"finally ( {ap1['eng']} )\"\n",
+    "    translated = f\"go to {ap1['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_B(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n",
+    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n",
+    "    translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_C(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n",
+    "    translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_D(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n",
+    "    translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_E(room_1, room_2):\n",
+    "    raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n",
+    "    translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "translation_seeds = []\n",
+    "for r1 in APs:\n",
+    "    translation_seeds.append(build_type_A(r1))\n",
+    "    for r2 in APs:\n",
+    "        if r1 == r2:\n",
+    "            continue\n",
+    "        translation_seeds.append(build_type_B(r1, r2))\n",
+    "        translation_seeds.append(build_type_C(r1, r2))\n",
+    "        translation_seeds.append(build_type_D(r1, r2))\n",
+    "        translation_seeds.append(build_type_E(r1, r2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "540\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(translation_seeds))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "set()\n"
+     ]
+    }
+   ],
+   "source": [
+    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
+    "print(unique_ltls - seed_ltls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save the canonical decoding list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "possible_decodings = {}\n",
+    "for seed in translation_seeds:\n",
+    "    canonical = seed['canonical_ltl']\n",
+    "    possible_decodings[canonical] = {\n",
+    "        'formula': canonical,\n",
+    "        'raw': seed['raw_ltl'],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"canonical.json\", 'w') as f:\n",
+    "    f.write(json.dumps(possible_decodings, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save translation seed for zero shot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"train_seed.jsonl\", \"w\") as f:\n",
+    "    for dp in translation_seeds:\n",
+    "        better_ltl = dp['canonical_ltl']\n",
+    "        entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save golden data for evaluation  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_canonical_mapping = {\n",
+    "    seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6185"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(DPs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ltl': '~ ( P04 ) U ( P09 )',\n",
+       " 'eng': 'go to the twond floor avoiding Prop_04 . if you reached Prop_09 you can stay there , or head to Prop_04 .'}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DPs[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"golden.jsonl\", \"w\") as f:\n",
+    "    for dp in DPs:\n",
+    "        entry = {\n",
+    "            'canonical': raw_canonical_mapping[dp['ltl']],\n",
+    "            'formula': raw_canonical_mapping[dp['ltl']],\n",
+    "            'natural': dp['eng'],\n",
+    "            'raw_ltl': dp['ltl'],\n",
+    "        }\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.12 ('GPML')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.17"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2756c0722662dee920aca0e381d2dfbaa24c07ed
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/eng.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92619236e5914a378b02590aa849b638636f8e990fed0c5871ca825a3514898
+size 279550
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f298beb880f69480609f94bc2195d934578233c0
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/raw/ltl.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f94fbdd544881ff5878b8c1bac235b3e2daf9f3299534a6f4244ff7a1735b8d
+size 127212
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..76c69e58b4a0a682801b56d9db82065635bbaed6
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/drone-planning/train_seed.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f0497459f33dea5b40f948bb4669c686533b28884e1dd1de16e476629bde672
+size 82488
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e3f1838116d9cc1154aa8a300d8130454d6eedb
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/canonical.json	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1363d85f461329a60fc7199493899280d5d3b514c88283470a8423504021364
+size 1249
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..96dada4868f79e48eb6afa92c6c61d89563c251d
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/golden.jsonl	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe1a0d48d87f6a65c82b7645e8d3dc2709f0b19adf16379eb7df066e1ca4b8a8
+size 256421
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b5c545db1abb15e296b062aa0b1cc828cc1dad5e
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/preprocess.ipynb	
@@ -0,0 +1,360 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"raw/ltl.txt\", \"r\") as f:\n",
+    "    raw_ltls = f.read().splitlines()\n",
+    "    unique_ltls = set(raw_ltls)\n",
+    "with open(\"raw/eng.txt\", 'r') as f:\n",
+    "    raw_engs = f.read().splitlines()\n",
+    "\n",
+    "DPs = []\n",
+    "for ltl, eng in zip(raw_ltls, raw_engs):\n",
+    "    DPs.append({'ltl': ltl, 'eng': eng})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "G & U S ! C F C\n",
+      "G & U S ! A F A\n",
+      "G & U S ! Y F Y\n",
+      "G & U S ! R F R\n",
+      "G & U S ! B F B\n"
+     ]
+    }
+   ],
+   "source": [
+    "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
+    "    print(ltl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Clean-up Domain, with augmentation\n",
+      "Number of Data Points 744\n",
+      "Number of unique LTL expressions: 5\n",
+      "Number of unique LTL structures: 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Clean-up Domain, with augmentation\")\n",
+    "print(\"Number of Data Points\", len(DPs))\n",
+    "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
+    "print(\"Number of unique LTL structures:\", 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "G & U S ! A F A\n",
+      "move the arm to the block and pick it up next placing it in the bin and coming back to pick up the next block\n",
+      "\n",
+      "G & U S ! R F R\n",
+      "watch for blocks to be set down and then move them into the basket but not any orange ones\n",
+      "\n",
+      "G & U S ! B F B\n",
+      "watch for blocks to be set down and then move them into the basket but not any blue ones\n",
+      "\n",
+      "G & U S ! C F C\n",
+      "watch for blocks to be set down and then move them into the basket but not any green ones\n",
+      "\n",
+      "G & U S ! Y F Y\n",
+      "watch for blocks to be set down and then move them into the basket but not any yellow ones\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "seen = set()\n",
+    "from random import shuffle, seed\n",
+    "seed(0)\n",
+    "# shuffle(DPs)\n",
+    "for dp in DPs:\n",
+    "    if dp['ltl'] not in seen:\n",
+    "        seen.add(dp['ltl'])\n",
+    "        print(dp['ltl'])\n",
+    "        print(dp['eng'])\n",
+    "        print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean up LTL expressions "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "APs = [\n",
+    "    {'ap': \"A\", 'eng': \"any cubes\"},\n",
+    "    {'ap': \"R\", 'eng': \"any non red cubes\"},\n",
+    "    {'ap': \"B\", 'eng': \"any non blue cubes\"},\n",
+    "    {'ap': \"Y\", 'eng': \"any non yellow cubes\"},\n",
+    "    {'ap': \"C\", 'eng': \"any non green cubes\"},\n",
+    "]\n",
+    "APs = [\n",
+    "    {'ap': \"P01\", 'eng': \"any cubes\"},\n",
+    "    {'ap': \"P02\", 'eng': \"any non red cubes\"},\n",
+    "    {'ap': \"P03\", 'eng': \"any non blue cubes\"},\n",
+    "    {'ap': \"P04\", 'eng': \"any non yellow cubes\"},\n",
+    "    {'ap': \"P05\", 'eng': \"any non green cubes\"},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_type_A(ap1):\n",
+    "    return {\n",
+    "        'raw_ltl': f\"G & U S ! {ap1['ap']} F {ap1['ap']}\",\n",
+    "        'canonical_ltl': f\"globally ( and ( until ( scan , not ( {ap1['eng']} ) ) , finally ( {ap1['eng']} ) ) )\",\n",
+    "        'eng': f\"Look for and pick up {ap1['eng']} and put them in crate.\"}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "translation_seeds = []\n",
+    "for r1 in APs:\n",
+    "    translation_seeds.append(build_type_A(r1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(translation_seeds))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "set()\n"
+     ]
+    }
+   ],
+   "source": [
+    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
+    "print(unique_ltls - seed_ltls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save the canonical decoding list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "possible_decodings = {}\n",
+    "for seed in translation_seeds:\n",
+    "    canonical = seed['canonical_ltl']\n",
+    "    possible_decodings[canonical] = {\n",
+    "        'formula': canonical,\n",
+    "        'raw': seed['raw_ltl'],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"canonical.json\", 'w') as f:\n",
+    "    f.write(json.dumps(possible_decodings, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save translation seed for zero shot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"train_seed.jsonl\", \"w\") as f:\n",
+    "    for dp in translation_seeds:\n",
+    "        better_ltl = dp['canonical_ltl']\n",
+    "        entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save golden data for evaluation  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_canonical_mapping = {\n",
+    "    seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "744"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(DPs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"golden.jsonl\", \"w\") as f:\n",
+    "    for dp in DPs:\n",
+    "        entry = {\n",
+    "            'canonical': raw_canonical_mapping[dp['ltl']],\n",
+    "            'formula': raw_canonical_mapping[dp['ltl']],\n",
+    "            'natural': dp['eng'],\n",
+    "            'raw_ltl': dp['ltl'],\n",
+    "        }\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.12 ('GPML')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ae7e21ced6e1aaf90aafe45e2839fd13cd412e3
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng copy.txt	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56804682d199b31496377cdf96d26a55dd1d56a3c23f75fcae2b67ae74c55c6e
+size 59880
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cb701336ff2b9e2bd533bd5858f1d6f9c2e45a7
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/eng.txt	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e87a60993706acc9b49cd7c77af816a2da598af6b4a207c015222cbd33a88d8
+size 59540
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a43774e6d72914ceab9e5f7aa76553bb9d6a7e4f
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/raw/ltl.txt	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:824bdb89067fc97767f2239c8c930c657bbcf9ace019f8b8369ed6a12e920caa
+size 14879
diff --git a/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..cfe8cedd74fa10d259a9b398a76e3431116402b6
--- /dev/null
+++ b/NL2TL-dataset/datasets-Efficient-Eng-2-LTL/pick-and-place seems bad/train_seed.jsonl	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fff7cee728f6db130cb08340071e38939f55b1c8ca7e8b0c34faecb18a597347
+size 1485
diff --git a/NL2TL-dataset/datasets-Lang2LTL/README.md b/NL2TL-dataset/datasets-Lang2LTL/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0752c463862c24862eb1c1b24d03848f3e922ad8
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/README.md
@@ -0,0 +1,9 @@
+# Lang2LTL
+Translation from natural language to Linear Temporal Logic
+
+There are two models in this project.
+- Seq2Seq
+- Seq2Seq with NER
+
+The comet expriment linked to these two models are:
+https://www.comet.ml/suchzheng2/lang2ltl/view/new
diff --git a/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv b/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d16154f7a5c3a16f4997f54cbc88d7bf0d2ce50b
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/input_500_originaltext.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94dccd1079c60de4b86f3467cdd6ae157ad9484f88126c9e8a570f465f55e5a2
+size 45392
diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..691a6f16beeb373081fdd7de0741db0931c5e66e
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf7a7de8ce5e3d0da2ed861eda7b80c998837a9314198cf5c85d0158dbc46e6
+size 6372
diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt
new file mode 100644
index 0000000000000000000000000000000000000000..887a529d456b8b4216ec3e84d9abcd2c82deeca4
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seqENR_validation_false.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b711d75a567a06fd2d6037c25469b7aa6b9cd4a906619dea488d3d77d6259e89
+size 1158
diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results
new file mode 100644
index 0000000000000000000000000000000000000000..f11337984a0601371bfe30b6c7cda294cb00db60
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validatin_results
@@ -0,0 +1,219 @@
+> find the bookstore and then find the nelson fitness center
+= f (bookstore & f (nelson fitness center))
+<f (bookstore & f (nelson fitness center)) <EOS>
+
+> stay on the angell st first find the bookstore and then find the chipotle
+= g (angell st) & f (bookstore & f (chipotle))
+<g (angell st) & f (bookstore & f (chipotle)) <EOS>
+
+> do not enter angell st first find the barus building and then find the marston hall
+= g (! angell st) & f (barus building & f (marston hall))
+<g (! angell st) & f (barus building & f (marston hall)) <EOS>
+
+> stay on the thayer st and find the chipotle
+= g (thayer st) & f (chipotle)
+<g (thayer st) & f (chipotle) <EOS>
+
+> go to the citizen's bank and then go to the bookstore
+= f (citizen's bank & f (bookstore))
+<f (citizen's bank & f (bookstore)) <EOS>
+
+> do not enter brook st first find the cvs and then find the chipotle
+= g (! brook st) & f (cvs & f (chipotle))
+<g (! brook st) & f (cvs & f (chipotle)) <EOS>
+
+> do not enter thayer st and find the marston hall
+= g (! thayer st) & f (marston hall)
+<g (! thayer st) & f (marston hall) <EOS>
+
+> do not enter brook st first find the bookstore and then find the chipotle
+= g (! brook st) & f (bookstore & f (chipotle))
+<g (! brook st) & f (bookstore & f (chipotle)) <EOS>
+
+> do not enter brook st first find the starbucks and then find the bookstore
+= g (! brook st) & f (starbucks & f (bookstore))
+<g (! brook st) & f (starbucks & f (bookstore)) <EOS>
+
+> stay on the thayer st first find the chipotle and then find the starbucks
+= g (thayer st) & f (chipotle & f (starbucks))
+<g (thayer st) & f (chipotle & f (starbucks)) <EOS>
+
+> do not enter brook st first find the chipotle and then find the cvs
+= g (! brook st) & f (chipotle & f (cvs))
+<g (! brook st) & f (chipotle & f (cvs)) <EOS>
+
+> go to the science library and then go to the watson center
+= f (science library & f (watson center))
+<f (science library & f (watson center)) <EOS>
+
+> go to the cvs and then go to the bookstore
+= f (cvs & f (bookstore))
+<f (cvs & f (bookstore)) <EOS>
+
+> go to the science library and then go to the chipotle
+= f (science library & f (chipotle))
+<f (science library & f (chipotle)) <EOS>
+
+> find the barus building and then find the fedex office
+= f (barus building & f (fedex office))
+<f (barus building & f (fedex office)) <EOS>
+
+> go to the science library and then go to the citizen's bank
+= f (science library & f (citizen's bank))
+<f (science library & f (citizen's bank)) <EOS>
+
+> stay on the thayer st first find the bookstore and then find the chipotle
+= g (thayer st) & f (bookstore & f (chipotle))
+<g (thayer st) & f (bookstore & f (chipotle)) <EOS>
+
+> go to the watson center and then go to the cvs
+= f (watson center & f (cvs))
+<f (watson center & f (cvs)) <EOS>
+
+> go to the marston hall and then go to the nelson fitness center
+= f (marston hall & f (nelson fitness center))
+<f (marston hall & f (nelson fitness center)) <EOS>
+
+> go to the chipotle and then go to the kabob and curry
+= f (chipotle & f (kabob and curry))
+<f (chipotle & f (kabob and curry)) <EOS>
+
+> stay on the thayer st first find the cvs and then find the bookstore
+= g (thayer st) & f (cvs & f (bookstore))
+<g (thayer st) & f (cvs & f (bookstore)) <EOS>
+
+> do not enter angell st and find the barus building
+= g (! angell st) & f (barus building)
+<g (! angell st) & f (barus building) <EOS>
+
+> find the cvs and then find the starbucks
+= f (cvs & f (starbucks))
+<f (cvs & f (starbucks)) <EOS>
+
+> go to the cvs and then go to the fedex office
+= f (cvs & f (fedex office))
+<f (cvs & f (fedex office)) <EOS>
+
+> go to the science library and then go to the starbucks
+= f (science library & f (starbucks))
+<f (science library & f (starbucks)) <EOS>
+
+> go to the barus building and then go to the chipotle
+= f (barus building & f (chipotle))
+<f (barus building & f (chipotle)) <EOS>
+
+> do not enter brook st first find the bookstore and then find the chipotle
+= g (! brook st) & f (bookstore & f (chipotle))
+<g (! brook st) & f (bookstore & f (chipotle)) <EOS>
+
+> find the fedex office and then find the chipotle
+= f (fedex office & f (chipotle))
+<f (fedex office & f (chipotle)) <EOS>
+
+> do not enter brook st first find the bookstore and then find the starbucks
+= g (! brook st) & f (bookstore & f (starbucks))
+<g (! brook st) & f (bookstore & f (starbucks)) <EOS>
+
+> find the citizen's bank and then find the kabob and curry
+= f (citizen's bank & f (kabob and curry))
+<f (citizen's bank & f (kabob and curry)) <EOS>
+
+> do not enter angell st first find the watson center and then find the marston hall
+= g (! angell st) & f (watson center & f (marston hall))
+<g (! angell st) & f (watson center & f (marston hall)) <EOS>
+
+> go to the barus building and then go to the marston hall
+= f (barus building & f (marston hall))
+<f (barus building & f (marston hall)) <EOS>
+
+> find the kabob and curry and then find the citizen's bank
+= f (kabob and curry & f (citizen's bank))
+<f (kabob and curry & f (citizen's bank)) <EOS>
+
+> do not enter angell st first find the citizen's bank and then find the watson center
+= g (! angell st) & f (citizen's bank & f (watson center))
+<g (! angell st) & f (citizen's bank & f (watson center)) <EOS>
+
+> do not enter brook st and find the chipotle
+= g (! brook st) & f (chipotle)
+<g (! brook st) & f (chipotle) <EOS>
+
+> find the fedex office and then find the marston hall
+= f (fedex office & f (marston hall))
+<f (fedex office & f (marston hall)) <EOS>
+
+> go to the citizen's bank
+= f (citizen's bank)
+<f (citizen's bank) <EOS>
+
+> stay on the thayer st first find the kabob and curry and then find the bookstore
+= g (thayer st) & f (kabob and curry & f (bookstore))
+<g (thayer st) & f (kabob and curry & f (bookstore)) <EOS>
+
+> do not enter angell st first find the marston hall and then find the watson center
+= g (! angell st) & f (marston hall & f (watson center))
+<g (! angell st) & f (marston hall & f (watson center)) <EOS>
+
+> find the science library and then find the fedex office
+= f (science library & f (fedex office))
+<f (science library & f (fedex office)) <EOS>
+
+> do not leave the thayer st and find the chipotle
+= g (thayer st) & f (chipotle)
+<g (thayer st) & f (chipotle) <EOS>
+
+> find the marston hall and then find the science library
+= f (marston hall & f (science library))
+<f (marston hall & f (science library)) <EOS>
+
+> go to the watson center and then go to the nelson fitness center
+= f (watson center & f (nelson fitness center))
+<f (watson center & f (nelson fitness center)) <EOS>
+
+> do not enter angell st first find the citizen's bank and then find the marston hall
+= g (! angell st) & f (citizen's bank & f (marston hall))
+<g (! angell st) & f (citizen's bank & f (marston hall)) <EOS>
+
+> do not enter brook st first find the science library and then find the bookstore
+= g (! brook st) & f (science library & f (bookstore))
+<g (! brook st) & f (science library & f (bookstore)) <EOS>
+
+> stay on the brook st and find the marston hall
+= g (brook st) & f (marston hall)
+<g (brook st) & f (marston hall) <EOS>
+
+> go to the marston hall and then go to the cvs
+= f (marston hall & f (cvs))
+<f (marston hall & f (cvs)) <EOS>
+
+> stay away from thayer st and find the barus building
+= g (! thayer st) & f (barus building)
+<g (! thayer st) & f (barus building) <EOS>
+
+> do not enter brook st first find the bookstore and then find the chipotle
+= g (! brook st) & f (bookstore & f (chipotle))
+<g (! brook st) & f (bookstore & f (chipotle)) <EOS>
+
+> do not enter brook st first find the chipotle and then find the bookstore
+= g (! brook st) & f (chipotle & f (bookstore))
+<g (! brook st) & f (chipotle & f (bookstore)) <EOS>
+
+> stay away from thayer st and find the bookstore
+= g (! thayer st) & f (bookstore)
+<g (! thayer st) & f (bookstore) <EOS>
+
+> go to the nelson fitness center and then go to the chipotle
+= f (nelson fitness center & f (chipotle))
+<f (nelson fitness center & f (chipotle)) <EOS>
+
+> find the bookstore and then find the marston hall
+= f (bookstore & f (marston hall))
+<f (bookstore & f (marston hall)) <EOS>
+
+> find the kabob and curry and then find the starbucks
+= f (kabob and curry & f (starbucks))
+<f (kabob and curry & f (starbucks)) <EOS>
+
+> stay on the thayer st first find the kabob and curry and then find the cvs
+= g (thayer st) & f (kabob and curry & f (cvs))
+<g (thayer st) & f (kabob and curry & f (cvs)) <EOS>
\ No newline at end of file
diff --git a/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee7cb2d8a54a721c8c6922c5b5b88d7e05530ddf
--- /dev/null
+++ b/NL2TL-dataset/datasets-Lang2LTL/seq2seq_validation_false.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a579605b2bb4bfa35d2e2b6d2cb051c2dde5179c0ceabd4ab00fb37c7b2c6f1
+size 959
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..3d31d1f55a9d923930cd06a2408464e0dee86799
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/CW/CW_total_3382_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b10c9628cb9941c8b7ea7fea71b2fa16edcd2779a6033bf473a6dcdd64f000f
+size 604641
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..105beb79da430fbc3d835d1206c2961fda1dcf6e
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_lifted_total39378_05_19/lifted_data.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb
+size 12458149
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..3d31d1f55a9d923930cd06a2408464e0dee86799
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/CW_total_3382_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b10c9628cb9941c8b7ea7fea71b2fa16edcd2779a6033bf473a6dcdd64f000f
+size 604641
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..37d273a7b8f000b9cdaad5ee816f205604eb88b7
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_test_2232_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:929120bed687b075462988aa54206517a6a7b962392e6d5cb298a571a31ea43d
+size 458174
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..aef9cd294a1c4685e353a7e74e0b3e4b3c432361
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/GLTL_train_8923_for_transfer_word_midfix.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930
+size 1839113
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2adaed9e6fe8556d1aec8f74d72c77b255cb1d2e
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_src_syn.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de33b8c18bd4d1ba0d7cf4a49fb3ff08f908eca0942484f7f37c8c8d70cb356
+size 154268
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d75d5ba461a87093b82d116f7b783c362a8e440
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/hard_pc_tar_syn.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a639bac11e74ce17140e8812eede765580025094975e105bbdb423639f8fdea
+size 32323
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec4b031ca74bb00bb5c235b4b88f38147236fb59
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/CW/readme.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5afb8bfcf7df46c5c52fd15667cf10f325c39d9b9edd195f0b880f29b6a44dcd
+size 441
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eac5a2c6b32da7a8b0c91027183c600eeed745bc
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_src.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:336e8478aa231f107b1285ba3d071920a4d7cf5279d942028cc71f85031ccd16
+size 122528
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f6cce88f36e042dc627f6d0f6c5c12cb89725ce
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/test_tar.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:910615cecd4b8af1ac735bc3111e6b76e24891cf53f82150dd4e876953d51a14
+size 24990
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6396ae796327b05823f1c77e3b2a3b082dca63c
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_src.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a14b0d7313d27a7dd2bdfcbb128b08f155a9957ef3497bcf570c6746023f731
+size 485651
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9581e635af74d25be566710a6c2efc81d15294e4
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/GLTL/train_tar.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1985262c03d599245e46c81cacd274bdaf84e0561f052fa5230e4754238718e
+size 100804
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa7b7d9f4a291bc94c95fd5e95609e9b66c4e6c2
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/src.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:694127ec676506947299d57959e190393efca1a70ff259221291caf3032c9b8b
+size 372558
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe701928335784cf5b03787226c0b1a85057bee9
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Navi/tar.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3021aa4b921095d88d7bbba4ecf9190dad249c3eccc0b3ded6a7ee5ff3609d4f
+size 453829
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebda7126793223e9a9346a6874c0225e26a7f729
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/src.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:294efdd3cc1dd1a7c980fb3360c2dd1c9864be551fadb74e0cbc8d74e8ae098d
+size 849652
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d395a6c8b06a33eb72412a3147335f11ae7aaeec
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/Seq2Seq_baseline/Seq2seq_lifted_dataset_all_txt/tar.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e00fb12bef2f9c5dcb1b1e266dc5989c60ac0783f729237e0c51e48997e4b080
+size 498925
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..dc97ed176f0b9a36d948ca1bdd8e802ffceea87c
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/circuit_total_refined.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29be14b96cb7519d73901f6ed1c02d85bb29ee1a3ed1f306f3ffe0af272fca81
+size 7507287
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/Data_transfer_domain/navi_total_refined.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8
+size 3825939
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/README.md b/NL2TL-dataset/datasets-NL2TL_data_github/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..953ca8391973537fcd1991d15e29e1a7a510087b
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/README.md
@@ -0,0 +1,121 @@
+# NL2STL
+Webpage: https://yongchao98.github.io/MIT-realm-NL2TL/
+
+Demo Website: http://realm-02.mit.edu:8444
+
+Paper Link: https://arxiv.org/pdf/2305.07766.pdf
+
+Dataset Link: https://drive.google.com/drive/folders/10F-qyOhpqEi83o9ZojymqRPUtwzSOcfq?usp=sharing
+
+Model Link: [https://drive.google.com/drive/folders/1vSaKOunMPA3uiOdx6IDbe-gmfREXQ9uO?usp=share_link](https://drive.google.com/drive/folders/1ZfZoYovWoy5z247VXZWZBniNrCOONX4N?usp=share_link)
+
+To access the Demo Website, please send email to ycchen98@mit.edu or yongchaochen@fas.harvard.edu for **password**
+
+This project is to transform human natural languages into Signal temporal logics (STL). Here to enhance the generalizability, in each natural language the specific atomic proposition (AP) is represented as prop_1, prop_2, etc. In this way, the trained model can be easier to transfer into various specific domains. The APs refer to some specific specifications like grad the apple, or go to the room.
+
+Also in the current work, the co-reference is not considered. Therefore, **each prop_i should only appear once in each sentence**. One inference example is as the following:
+
+Input natural language:
+
+```
+If ( prop_2 ) happens and continues to happen until at some point during the 176 to 415 time units that ( prop_1 ) , and also if ( prop_3 ) , then the scenario is equivalent to ( prop_4 ) .
+```
+
+Output Signal temporal logic:
+
+```
+( ( ( prop_2 until [176,415] prop_1 ) and prop_3 ) equal prop_4 )
+```
+
+The operations we used are U(until), F(finally), G(globally), |(or), &(and), ->(imply), <->(equal), negation. Also we allow the time interval definition, like U[0,5], F[12,100], and G[30,150]. The time numer right now is constrained into integer, and can use infinite to express all the time in the future, like [5,infinite]. The following are the illustrations. More NL-TL pair examples at https://drive.google.com/file/d/1f-wQ8AKInlTpXTYKwICRC0eZ-JKjAefh/view?usp=sharing
+```
+prop_1 U[0,5] prop_2 : There exits one time point t between 0 and 5 timesteps from now, that prop_1 continues to happen until at this timestep, and prop_2 happens at this timestep.
+```
+```
+F[12,100] prop_2 : There exits one time point t between 12 and 100 timesteps from now, that prop_2 happens at this timestep.
+```
+```
+G[30,150] prop_2 : For all the time between 30 and 150 timesteps from now, that prop_2 always happens.
+```
+```
+prop_1 -> prop_2 : If prop_1 happens, then prop_2 also happens.
+```
+```
+prop_1 <-> prop_2: prop_1 happens if and only if prop_2 happens.
+```
+
+## Description
+
+Signal Temporal Logic (STL) is a formal language for specifying properties of signals. It is used to specify properties of continuous-time signals, such as signals from sensors or control systems, in a way that is precise and easy to understand.
+
+STL has a syntax that is similar to the temporal logic used in computer science, but it is specialized for continuous-time signals. It includes operators for describing the values of a signal, as well as operators for combining and modifying those descriptions.
+
+For example, the STL formula F[0, 2] (x > 0.5) specifies the property that the signal x is greater than 0.5 for all time points between 0 and 2 seconds. This formula can be read as "the signal x is eventually greater than 0.5 for a period of at least 2 seconds".
+
+STL can be used to verify that a signal satisfies a given property, or to synthesize a controller that ensures that a signal satisfies a given property. It is a powerful tool for reasoning about the behavior of continuous-time systems.
+
+While STL is quite powerful, humans are more familiar with defining the tasks via natural languages. Here we try to bridge this gap via fine-tuning large languages models.
+
+## Getting Started
+
+### Dependencies
+
+* The inference model should run on GPU, you can run the notebook file Run.ipynb on Google Colab, or run_trained_model.py on your own GPU environment.
+* As for setting the environment, here we install our environmrnt via Minoconda. You can first set up it via https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html
+* Then it is time to install packages:
+```
+conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia
+conda install pip
+conda install python
+conda install numpy
+conda install pandas
+pip install transformers
+pip install SentencePiece
+```
+
+### Installing
+
+* First download the whole directory with command
+```
+git clone git@github.com:yongchao98/NL2TL.git
+```
+* Then download the trained wieghts (e.g., checkpoint-62500) of our model in [https://drive.google.com/file/d/19uiB_2XnnnVmDInaLbQeoZq25ghUdg4D/view](https://drive.google.com/drive/folders/1ZfZoYovWoy5z247VXZWZBniNrCOONX4N?usp=sharing)
+* After downloading both the code and model weights, put the model weights checkpoint-62500 into your self-defined directory.
+
+### Other codes and datasets
+
+* As for other codes and datasets published on github, please read the **Illustration of Code and Dataset.pdf** for specific explanation of their utilities.
+
+## Authors
+
+Contributors names and contact info
+
+Yongchao Chen (Harvard University, Massachusetts Institute of Technology, Laboratory of Information and Decision Systems)
+
+yongchaochen@fas.harvard.edu or ycchen98@mit.edu
+
+## Citation for BibTeX
+
+@article{chen2023nl2tl,
+  title={NL2TL: Transforming Natural Languages to Temporal Logics using Large Language Models},
+  author={Chen, Yongchao and Gandhi, Rujul and Zhang, Yang and Fan, Chuchu},
+  journal={arXiv preprint arXiv:2305.07766},
+  year={2023}
+}
+}
+
+## Version History
+
+* 0.1
+    * Initial Release on May 12, 2023
+
+## License
+
+This corresponding paper of this project will be attached here in the next months. This project can only be commercially used under our permission.
+
+## Recommended Work
+
+[AutoTAMP: Autoregressive Task and Motion Planning with LLMs as Translators and Checkers](https://arxiv.org/pdf/2306.06531.pdf)
+
+[Scalable Multi-Robot Collaboration with Large Language Models: Centralized or Decentralized Systems?](https://yongchao98.github.io/MIT-REALM-Multi-Robot/)
+
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx b/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx
new file mode 100644
index 0000000000000000000000000000000000000000..341b509d9654aacd98cef578859db88885a5f035
Binary files /dev/null and b/NL2TL-dataset/datasets-NL2TL_data_github/Readme.docx differ
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..72ddc3104cd9c4475e2a2f225f37780bf891a303
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eee8491bd2b58b6ade2137528cd0686bea5414a6c4b3683af3079a9ead8239a4
+size 57441457
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..f695e33725dca8e64bfc50f6e02828ceb937a5e7
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1528e3aa2e9bb2a11982acabcb652f7e0fb96a14f40137a365fdde22dda2a903
+size 85675938
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..f695e33725dca8e64bfc50f6e02828ceb937a5e7
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/circuit_total_span_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1528e3aa2e9bb2a11982acabcb652f7e0fb96a14f40137a365fdde22dda2a903
+size 85675938
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv
new file mode 100644
index 0000000000000000000000000000000000000000..abf00bb38f92102151b812906cb509d32bb5bde7
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/corpus_id.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c56b5ef5ec82edfcf51926f74fddc977514e53c1e8c3cc1a6012bd9c6c3047c
+size 46673129
diff --git a/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e41389d8202ce107919bd0c651d33a63416f1
--- /dev/null
+++ b/NL2TL-dataset/datasets-NL2TL_data_github/raw_data/navi_total_refined.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8
+size 3825939
diff --git a/NL2TL-dataset/datasets-nl2spec/Readme.md b/NL2TL-dataset/datasets-nl2spec/Readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..8d807780fb55214df2b1ac9fb852eba520ef8965
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/Readme.md
@@ -0,0 +1,98 @@
+# Overview
+
+nl2spec is a framework for applying Large Language Models (LLMs) to derive formal specifications (in temporal logics) from unstructured natural language. It introduces a new methodology to detect and resolve the inherent ambiguity of system requirements in natural language by utilizing LLMs to map subformulas of the formalization back to the corresponding natural language fragments of the input. Users iteratively add, delete, and edit these sub-translations to amend erroneous formalizations, which is easier than manually redrafting the entire formalization.
+
+The tool works best when providing in-distribution few-shot examples (see [Prompting](#prompting)). It can be easily adjusted to other models by extending the backend (see [Extendability](#extendability)).
+
+# Install
+
+Fulfill dependencies:
+- [flask](https://flask.palletsprojects.com/en/2.2.x/)
+- [ltlf2dfa](https://github.com/whitemech/LTLf2DFA)
+- [huggingface](https://huggingface.co/)
+- [openai-api](https://openai.com/blog/openai-api)
+- [google-cloud-aiplatform](https://cloud.google.com/python/docs/reference/aiplatform/latest/index.html)
+
+All model inferences run in the cloud. Therefore, access to the respective APIs is necessary.
+ - For bloom and bloomz, a [huggingface](huggingface.co) *User Access Token* is needed. You can create a token and use bloom/bloomz for free.
+ - For Codex / GPT-based models, an [OpenAI](openai.com) *API key* is needed. See [OpenAI's Pricing](https://openai.com/pricing)
+ - For using PaLM, access through *google cloud platform* is required. Set up a project in [google cloud platform](https://console.cloud.google.com/) with [Vertex AI enabled](https://console.cloud.google.com/vertex-ai) and [authenticate with the Google Cloud CLI](https://cloud.google.com/cli). See [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing).
+ - See [Extendability](#extendability) on how to add more models.
+ - Model inference through the APIs may be discontinued by the providers, which is beyond our control. This currently affects codex and bloomz. In the web interface, we marked these models as *obsolete*.
+
+
+# Run frontend
+
+From inside the ```src``` folder:
+- create the following file and paste your open ai, hf key or google project id into: ```keys/oai_key.txt```, ```keys/hf_key.txt``` or ```keys/google_project_id.txt```
+- ```python3 -m flask --app frontend.py run```
+- add ```--debug``` for debug mode
+- Then open web-based tool at http://127.0.0.1:5000
+
+# Run from terminal
+
+Can also be run from command line from the ```scr``` folder. See ```python3 nl2ltl.py --help``` for more details.
+
+E.g., with gpt-3.5:
+
+```python3 nl2ltl.py --model gpt-3.5-turbo --keyfile PATH/TO/YOUR/OPENAIKEY --nl "Globally a and b until c." --num_tries 3 --temperature 0.2 --prompt minimal```
+
+or bloom
+
+```python3 nl2ltl.py --model bloom --keyfile PATH/TO/YOUR/HFKEY --nl "Every request is eventually followed by a grant." --num_tries 3 --temperature 0.2 --prompt minimal```
+
+or PaLM
+
+```python3 nl2ltl.py --model text-bison@001 --keyfile PATH/TO/YOUR/GCLOUD/PROJECTID --nl "Every request is eventually followed by a grant." --num_tries 3 --temperature 0.2 --prompt minimal```
+
+# Prompting
+
+The prompting pattern consists of a short introduction to the specification language, followed by few-shot examples that exploit the LLM to construct subtranslations.
+We provide four example prompts in ```/prompts```:
+- minimal.txt - the generic prompt used for the user study experiments in the [paper]()
+- indistribution.txt - the prompt with few-shot examples drawn from the user study
+- smart.txt - a prompt obtained from examples in a recently conducted [smart home user study]()
+- stl.txt - a proof of concept extension for [Signal Temporal Logic (STL)]()
+
+See [the paper]() and ```/prompts``` for more details.
+The prompts in nl2spec must follow a specific pattern to obtain the subtranslations as follows.
+
+```<Specification Language Tutorial>```
+
+Followed by few shot examples, where each few shot example consists of the following:
+
+```<Natural Language input>```
+
+```<Given translations, as a dictionary>```
+
+```<Explanation of the translation>```
+
+```<Explanation dictionary>```
+
+```<Final Translation>```
+
+# Extendability
+
+The tool can be easily extended to other specification languages based on temporal logics (see ```/prompts/stl.txt```). The best performance is expected when drawing the few-shot examples from the distribution to translate. Adding a few well-crafted examples to the prompt and then using the tool to translate the rest of the workload is significantly easier than translating every requirement from scratch.
+
+Additionally, the tool can be extended to more fine-tuned or other upcoming open-source language models. To this end, ```backend.py``` must be extended with a new ```new_model```, for which a method named ```new_model``` must be implemented in ```models.py```.
+
+# How to cite
+
+```
+@inproceedings{nl2spec,
+	title = {nl2spec: Interactively Translating Unstructured Natural Language to Temporal Logics with Large Language Models},
+	author = {Cosler, Matthias and Hahn, Christopher and Mendoza, Daniel and Schmitt, Frederik and Trippel, Caroline},
+	booktitle = {Computer {Aided} {Verification}},
+	series = {Lecture {Notes} in {Computer} {Science}},
+	address = {Cham},
+	isbn = {978-3-031-37703-7},
+	shorttitle = {nl2spec},
+	doi = {10.1007/978-3-031-37703-7_18},
+	language = {en},
+	publisher = {Springer Nature Switzerland},
+	editor = {Enea, Constantin and Lal, Akash},
+	year = {2023},
+	pages = {383--396},
+}
+```
diff --git a/NL2TL-dataset/datasets-nl2spec/canonical.json b/NL2TL-dataset/datasets-nl2spec/canonical.json
new file mode 100644
index 0000000000000000000000000000000000000000..36e46a293620fda68c6c3c2d91098156085da0c8
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/canonical.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c09ca4c086b268eae4539e446b10d22a887805fc5295b0b9dae0bab0e82ee47
+size 73886
diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv b/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fe0f77f259442a7bca35798c0e5a20acab8c3ac3
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/experiments/codex-initial_predictions.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78195edfbd5757abaa8f4cb60476e4355cdcd0319a70650c5aa91d4f59f77b81
+size 8019
diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv b/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b9f2b0f53d139a9c81cb9b24910cb018b4e7a6ba
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/experiments/gpt-3.5-turbo-interactive_predictions.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daacb855ff79a51ba2a9bf49cfd561d2404207ad7a2a9730aadfb2dc4059b212
+size 4544
diff --git a/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv b/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7fded986cf33b65eb2031a55aafff09002142646
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/experiments/nl2spec-paper_experiment_results.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22e1afc28ca110d35c9cc8c9638234f6d2542a5b16367dd4a644b296a4ab556b
+size 4508
diff --git a/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b534ac898ff928c11c5e07c13ebd424f3c51a4f
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset-raw.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd34b01f5d369dfb4db3444b808111f62917f2ddb4fd5e5fddb497471546ecb
+size 70373
diff --git a/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c919a64e03ead0e7e1c2cb5df8df0bcc6cab1ec
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/expert_LTL_dataset.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:113cbbde14d29283413e70c0462a43c81c82bf99f32baed94810f6c4629f28ca
+size 70346
diff --git a/NL2TL-dataset/datasets-nl2spec/golden.jsonl b/NL2TL-dataset/datasets-nl2spec/golden.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb b/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a22a88b0035306f78c2dbd9c7e9089d2a5ddbb3c
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb
@@ -0,0 +1,547 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"raw/ltl.txt\", \"r\") as f:\n",
+    "    raw_ltls = f.read().splitlines()\n",
+    "    unique_ltls = set(raw_ltls)\n",
+    "with open(\"raw/eng.txt\", 'r') as f:\n",
+    "    raw_engs = f.read().splitlines()\n",
+    "\n",
+    "DPs = []\n",
+    "for ltl, eng in zip(raw_ltls, raw_engs):\n",
+    "    DPs.append({'ltl': ltl, 'eng': eng})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "G (P04)\n",
+      "F G ! P04\n",
+      "(! P02) U P01\n",
+      "G(P03 -> P02)\n",
+      "F (P03 && P02)\n",
+      "F P01 && F P02\n",
+      "G !(P01 & P03)\n",
+      "G (P02 && P01)\n",
+      "G(!(P01 & P02))\n",
+      "G (P03 -> F P06)\n",
+      "G F P01 || X P02\n",
+      "P03 U (G (F P04))\n",
+      "G ( P02 -> X P04)\n",
+      "G( P01 -> F P02 )\n",
+      "G (P02 -> X F P01)\n",
+      "G F P01 || G F P02\n",
+      " G (P01 <-> X P02)\n",
+      "G F P03 -> G F P02\n",
+      "G (P01 -> X G ! P02)\n",
+      " G (P03 | P04 | P05)\n",
+      "(P03 U P02) || G P03\n",
+      " ! G (! (P02 && X P02))\n",
+      "G(P01 -> X (X (X P02)))\n",
+      "G P02 && G(P03 -> !P01)\n",
+      " ! G (! (P01 && X P01))\n",
+      "G (P03 -> (P04 | X P04))\n",
+      "G (!(P01 && P02) -> F P03)\n",
+      "G(P01 -> F P02) -> G F P03\n",
+      "G (P01 && X P02 -> X X P04)\n",
+      " (P02 U (P02 & ! P01)) | G P02\n",
+      "P03 -> X ((P02 U P01) || G P02)\n",
+      "G(!(P02 &&P04)) && G(P02 || P04)\n",
+      "(F P05) -> (!P05 U (P06 & !P05))\n",
+      "G ((P04 <-> P02) -> (P03 <-> P05))\n",
+      "G ( P01 -> (X !P01 | XX !P01 | XXX !P01))\n",
+      "P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
+    "    print(ltl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Drone-planning Domain, with augmentation\n",
+      "Number of Data Points 756\n",
+      "Number of unique LTL expressions: 36\n",
+      "Number of unique LTL structures: 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Drone-planning Domain, with augmentation\")\n",
+    "print(\"Number of Data Points\", len(DPs))\n",
+    "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
+    "print(\"Number of unique LTL structures:\", 5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "F ( AP )\n",
+    "- Go to AP\n",
+    "\n",
+    "F ( AP1 & F ( AP2 ) )\n",
+    "- First go to AP1 and then to AP2\n",
+    "\n",
+    "\n",
+    "F ( AP1 ) & G ( AP2 )\n",
+    "- Go to AP1 and Always in AP2\n",
+    "\n",
+    "\n",
+    "F ( AP1 ) & ~ ( AP2 )\n",
+    "- Go to AP1 and avoid AP2 \n",
+    "\n",
+    "~ ( AP1 ) U ( AP2 )\n",
+    "- avoid going through AP1 until go through AP2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean up LTL expressions "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ori_aps = set()\n",
+    "for unique_ltl in unique_ltls:\n",
+    "    ori_aps.update(unique_ltl.split())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'!',\n",
+       " '!(P01',\n",
+       " '!P01',\n",
+       " '!P01)',\n",
+       " '!P01))',\n",
+       " '!P05))',\n",
+       " '&',\n",
+       " '&&',\n",
+       " '&&P04))',\n",
+       " '(',\n",
+       " '(!',\n",
+       " '(!(P01',\n",
+       " '(!P05',\n",
+       " '((P02',\n",
+       " '((P04',\n",
+       " '(F',\n",
+       " '(G',\n",
+       " '(P01',\n",
+       " '(P02',\n",
+       " '(P03',\n",
+       " '(P04',\n",
+       " '(P04)',\n",
+       " '(P06',\n",
+       " '(X',\n",
+       " ')',\n",
+       " '->',\n",
+       " '<->',\n",
+       " 'F',\n",
+       " 'G',\n",
+       " 'G(',\n",
+       " 'G(!(P01',\n",
+       " 'G(!(P02',\n",
+       " 'G(P01',\n",
+       " 'G(P02',\n",
+       " 'G(P03',\n",
+       " 'P01',\n",
+       " 'P01)',\n",
+       " 'P01))',\n",
+       " 'P02',\n",
+       " 'P02)',\n",
+       " 'P02))',\n",
+       " 'P02)))',\n",
+       " 'P03',\n",
+       " 'P03)',\n",
+       " 'P04',\n",
+       " 'P04)',\n",
+       " 'P04))',\n",
+       " 'P05)',\n",
+       " 'P05))',\n",
+       " 'P06)',\n",
+       " 'U',\n",
+       " 'X',\n",
+       " 'XX',\n",
+       " 'XXX',\n",
+       " '|',\n",
+       " '||'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ori_aps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "APs = [\n",
+    "    {'ap': 'blue_room', 'eng': 'the blue room'}, \n",
+    "    {'ap': 'green_room', 'eng': 'the green room'},\n",
+    "    {'ap': 'orange_room', 'eng': 'the orange room'},\n",
+    "    {'ap': 'purple_room', 'eng': 'the purple room'},\n",
+    "    {'ap': 'red_room', 'eng': 'the red room'},\n",
+    "    {'ap': 'yellow_room', 'eng': 'the yellow room'},\n",
+    "    {'ap': 'first_floor', 'eng': 'the first floor'},\n",
+    "    {'ap': 'second_floor', 'eng': 'the second floor'},\n",
+    "    {'ap': 'third_floor', 'eng': 'the third floor'},\n",
+    "    {'ap': 'landmark_1', 'eng': 'landmark 1'},\n",
+    "    {'ap': 'landmark_2', 'eng': 'landmark 2'},\n",
+    "    {'ap': 'landmark_3', 'eng': 'landmark 3'},\n",
+    "]\n",
+    "APs = [\n",
+    "    {'ap': 'P01', 'eng': 'P01'},\n",
+    "    {'ap': 'P02', 'eng': 'P02'},\n",
+    "    {'ap': 'P03', 'eng': 'P03'},\n",
+    "    {'ap': 'P04', 'eng': 'P04'},\n",
+    "    {'ap': 'P05', 'eng': 'P05'},\n",
+    "    {'ap': 'P06', 'eng': 'P06'},\n",
+    "    {'ap': 'P07', 'eng': 'P07'},\n",
+    "    {'ap': 'P08', 'eng': 'P08'},\n",
+    "    {'ap': 'P09', 'eng': 'P09'},\n",
+    "    {'ap': 'P10', 'eng': 'P10'},\n",
+    "    {'ap': 'P11', 'eng': 'P11'},\n",
+    "    {'ap': 'P12', 'eng': 'P12'},\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from the translator\n",
+    "def build_type_A(ap1):\n",
+    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
+    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
+    "    canonical_ltl = f\"finally ( {ap1['eng']} )\"\n",
+    "    translated = f\"go to {ap1['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_B(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n",
+    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n",
+    "    translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_C(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n",
+    "    translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_D(room_1, room_2):\n",
+    "    raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n",
+    "    translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}\n",
+    "\n",
+    "def build_type_E(room_1, room_2):\n",
+    "    raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n",
+    "    canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n",
+    "    translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n",
+    "    return {\n",
+    "        'raw_ltl': raw_ltl,\n",
+    "        'canonical_ltl': canonical_ltl,\n",
+    "        'eng': translated}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "translation_seeds = []\n",
+    "for r1 in APs:\n",
+    "    translation_seeds.append(build_type_A(r1))\n",
+    "    for r2 in APs:\n",
+    "        if r1 == r2:\n",
+    "            continue\n",
+    "        translation_seeds.append(build_type_B(r1, r2))\n",
+    "        translation_seeds.append(build_type_C(r1, r2))\n",
+    "        translation_seeds.append(build_type_D(r1, r2))\n",
+    "        translation_seeds.append(build_type_E(r1, r2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "540\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(translation_seeds))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'G (!(P01 && P02) -> F P03)', ' ! G (! (P02 && X P02))', 'F (P03 && P02)', 'G(P01 -> F P02) -> G F P03', 'F P01 && F P02', 'G (P02 -> X F P01)', 'P03 U (G (F P04))', 'G F P01 || G F P02', '(! P02) U P01', 'G(P01 -> X (X (X P02)))', 'F G ! P04', 'P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)', 'G (P01 -> X G ! P02)', 'G (P03 -> F P06)', 'G(!(P02 &&P04)) && G(P02 || P04)', 'G ( P02 -> X P04)', 'G (P03 -> (P04 | X P04))', 'G ( P01 -> (X !P01 | XX !P01 | XXX !P01))', 'P03 -> X ((P02 U P01) || G P02)', 'G(P03 -> P02)', '(F P05) -> (!P05 U (P06 & !P05))', 'G ((P04 <-> P02) -> (P03 <-> P05))', 'G F P01 || X P02', ' G (P03 | P04 | P05)', 'G(!(P01 & P02))', ' G (P01 <-> X P02)', 'G (P01 && X P02 -> X X P04)', 'G( P01 -> F P02 )', 'G (P04)', 'G !(P01 & P03)', 'G P02 && G(P03 -> !P01)', ' ! G (! (P01 && X P01))', 'G (P02 && P01)', ' (P02 U (P02 & ! P01)) | G P02', 'G F P03 -> G F P02', '(P03 U P02) || G P03'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
+    "print(unique_ltls - seed_ltls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save the canonical decoding list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "possible_decodings = {}\n",
+    "for seed in translation_seeds:\n",
+    "    canonical = seed['canonical_ltl']\n",
+    "    possible_decodings[canonical] = {\n",
+    "        'formula': canonical,\n",
+    "        'raw': seed['raw_ltl'],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"canonical.json\", 'w') as f:\n",
+    "    f.write(json.dumps(possible_decodings, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save translation seed for zero shot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"train_seed.jsonl\", \"w\") as f:\n",
+    "    for dp in translation_seeds:\n",
+    "        better_ltl = dp['canonical_ltl']\n",
+    "        entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save golden data for evaluation  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw_canonical_mapping = {\n",
+    "    seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "756"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(DPs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ltl': 'G( P01 -> F P02 )',\n",
+       " 'eng': 'Every P01 is eventually followed by a P02.'}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DPs[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'G( P01 -> F P02 )'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[1;32m/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mgolden.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m     \u001b[39mfor\u001b[39;00m dp \u001b[39min\u001b[39;00m DPs:\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m         entry \u001b[39m=\u001b[39m {\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mcanonical\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39;49m\u001b[39mltl\u001b[39;49m\u001b[39m'\u001b[39;49m]],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mformula\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m]],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mnatural\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39meng\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mraw_ltl\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=7'>8</a>\u001b[0m         }\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a>\u001b[0m         json\u001b[39m.\u001b[39mdump(entry, f)\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m         f\u001b[39m.\u001b[39mwrite(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'G( P01 -> F P02 )'"
+     ]
+    }
+   ],
+   "source": [
+    "with open(\"golden.jsonl\", \"w\") as f:\n",
+    "    for dp in DPs:\n",
+    "        entry = {\n",
+    "            'canonical': raw_canonical_mapping[dp['ltl']],\n",
+    "            'formula': raw_canonical_mapping[dp['ltl']],\n",
+    "            'natural': dp['eng'],\n",
+    "            'raw_ltl': dp['ltl'],\n",
+    "        }\n",
+    "        json.dump(entry, f)\n",
+    "        f.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.12 ('GPML')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.17"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/NL2TL-dataset/datasets-nl2spec/raw/eng.txt b/NL2TL-dataset/datasets-nl2spec/raw/eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1383efcbb4d2d3de684bc8e3cb40ed64327a3623
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/raw/eng.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e31d7d92d59e04c2fec3f4ed8a8b8d8320852ac38547c9aef268e87adc9204
+size 49243
diff --git a/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt b/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b014bd2c2f6c1cfc515230928101c6002e1161e
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/raw/ltl.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f15082d04114b3a3f7a3f1cca842a20c3723e70077f0a4439f35e7b56b48d55
+size 19739
diff --git a/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl b/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..76c69e58b4a0a682801b56d9db82065635bbaed6
--- /dev/null
+++ b/NL2TL-dataset/datasets-nl2spec/train_seed.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f0497459f33dea5b40f948bb4669c686533b28884e1dd1de16e476629bde672
+size 82488
diff --git a/README.md b/README.md
index 7be5fc7f47d5db027d120b8024982df93db95b74..aac109ae9f99791f7b70366a04d6a4e09f840f70 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,43 @@
----
-license: mit
----
+# Data-Efficient Learning of Natural Language to Linear Temporal Logic Translators for Robot Task Specification
+
+[[Homepage](https://nl2hltl.github.io/)] [[Paper](https://arxiv.org/abs/2408.08188)] [[Video](https://youtu.be/o8CRrVK9g9Q)] [[Poster](https://nl2hltl.github.io/media/figures/overview.png)]
+
+> The associated repo for paper "Data-Efficient Learning of Natural Language to Linear Temporal Logic Translators for Robot Task Specification".
+
+## Introduction
+
+The dataset is modified based on the following projects, in which we replace the task related descriptions, into task independent descriptions
+
+task related NL2TL example (from [Efficient-Eng-2-LTL](https://github.com/UM-ARM-Lab/Efficient-Eng-2-LTL))
+
+```json
+"globally ( and ( until ( scan , not ( any cubes ) ) , finally ( any cubes ) ) )": {
+    "formula": "globally ( and ( until ( scan , not ( any cubes ) ) , finally ( any cubes ) ) )",
+    "raw": "G & U S ! A F A"
+  },
+```
+
+task independent NL2TL example:
+```json
+{"natural": "go through the P01 until you get to the  P04", "raw_ltl": "F ( P01 A ( F P04 ) )"}
+```
+
+Based task related NL2TL datasets:
+  - datasets
+    - [Efficient-Eng-2-LTL](https://github.com/UM-ARM-Lab/Efficient-Eng-2-LTL)
+    - [Lang2LTL](https://github.com/h2r/Lang2LTL)
+    - [nl2spec](https://github.com/realChrisHahn2/nl2spec)
+    - [NL2TL](https://github.com/yongchao98/NL2TL)
+
+## Cite
+```bibtex
+@misc{xu2024scalingnaturallanguageunderstanding,
+      title={Scaling Up Natural Language Understanding for Multi-Robots Through the Lens of Hierarchy}, 
+      author={Shaojun Xu and Xusheng Luo and Yutong Huang and Letian Leng and Ruixuan Liu and Changliu Liu},
+      year={2024},
+      eprint={2408.08188},
+      archivePrefix={arXiv},
+      primaryClass={cs.RO},
+      url={https://arxiv.org/abs/2408.08188}, 
+}
+```