add dataset
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +4 -0
- NL2TL-dataset/NLTLsummary.json +3 -0
- NL2TL-dataset/collect/Cleaned_ENG.txt +3 -0
- NL2TL-dataset/collect/Cleaned_LTL.txt +3 -0
- NL2TL-dataset/collect/UNCleaned_ENG.txt +3 -0
- NL2TL-dataset/collect/UNCleaned_LTL.txt +3 -0
- NL2TL-dataset/collect/UNCleaned_num.txt +3 -0
- NL2TL-dataset/collect/eng.txt +3 -0
- NL2TL-dataset/collect/eng_gpt_auged.txt +3 -0
- NL2TL-dataset/collect/eng_gpt_auged2.txt +3 -0
- NL2TL-dataset/collect/eng改过了不太好.txt +3 -0
- NL2TL-dataset/collect/idxsrc_gpt_auged.txt +3 -0
- NL2TL-dataset/collect/idxsrc_gpt_auged2.txt +3 -0
- NL2TL-dataset/collect/log.jsonl +3 -0
- NL2TL-dataset/collect/ltl copy.txt +3 -0
- NL2TL-dataset/collect/ltl.txt +3 -0
- NL2TL-dataset/collect/ltl_eng_1.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_test-aug.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_test.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_test_mid.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_train-aug.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_train.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_train_mid.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl +3 -0
- NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl +3 -0
- NL2TL-dataset/collect/ltl_mid_order.txt +3 -0
- NL2TL-dataset/collect/ltl_mid_order_ascii.txt +3 -0
- NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt +3 -0
- NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt +3 -0
- NL2TL-dataset/collect/note.txt +3 -0
- NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl +3 -0
- NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl +3 -0
- NL2TL-dataset/collect2/LTLexplain_0.json +3 -0
- NL2TL-dataset/collect2/LTLexplain_1.json +3 -0
- NL2TL-dataset/collect2/LTLexplain_2.json +3 -0
- NL2TL-dataset/collect2/LTLexplain_3.json +3 -0
- NL2TL-dataset/collect2/LTLexplain_4.json +3 -0
- NL2TL-dataset/collect2/LTLsummary.json +3 -0
- NL2TL-dataset/collect2/getUniqueLTL.py +20 -0
- NL2TL-dataset/collect2/lifted_data.jsonl +3 -0
- NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl +3 -0
- NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl +3 -0
- NL2TL-dataset/collect2/navi_total_refined.jsonl +3 -0
- NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb +374 -0
- NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json +3 -0
- NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json +3 -0
- NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.jsonl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.txt filter=lfs diff=lfs merge=lfs -text
|
NL2TL-dataset/NLTLsummary.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6291896067a809119e01b43734779333c2d6b7baa36376afe8aa6579b4ba77ee
|
3 |
+
size 23025
|
NL2TL-dataset/collect/Cleaned_ENG.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77f8e6b696147098eb16be8ec7a2891a493c008e7d997f994d1b77fea4255559
|
3 |
+
size 447181
|
NL2TL-dataset/collect/Cleaned_LTL.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ced5c8ee114bc50e480c44999e991337318749dd7f5988379c0f2cd3c5940d2
|
3 |
+
size 252706
|
NL2TL-dataset/collect/UNCleaned_ENG.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29fe1e57c68d9096705eae98f6456aa24eb86ecc2a43fa7896785a30eba1c3d0
|
3 |
+
size 79381
|
NL2TL-dataset/collect/UNCleaned_LTL.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f903103fcdc5b86bf1aca3568fd5483bfab90412fb1581006c0433c4ac0feecc
|
3 |
+
size 34258
|
NL2TL-dataset/collect/UNCleaned_num.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50067192dcabdc789ff68998a4b67b44d5ea71ad870b2f3029f06a62e93cd1cc
|
3 |
+
size 7639
|
NL2TL-dataset/collect/eng.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:834f288fc7c4bf3c27e829db10d72f673b55551f5e50591e9ff05461b78417e7
|
3 |
+
size 384436
|
NL2TL-dataset/collect/eng_gpt_auged.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5d039a94cfde70475d1c935e80286d0c3bb9c578f01c86cf8f65aed015fdf14
|
3 |
+
size 46038
|
NL2TL-dataset/collect/eng_gpt_auged2.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6484a6ace4d0bdd6e00b337aa2e734d4cd3cfdbcd03f90a960ea3859381ffaad
|
3 |
+
size 96837
|
NL2TL-dataset/collect/eng改过了不太好.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e74a45639f6d71263ea70768e40a4d299137fca66de36da7657a15cbba60beb9
|
3 |
+
size 379834
|
NL2TL-dataset/collect/idxsrc_gpt_auged.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce789edf73241091f48e1e3a2b4eddf06ae0244fc20d4f050cbed474afd79ab6
|
3 |
+
size 4425
|
NL2TL-dataset/collect/idxsrc_gpt_auged2.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a71b50098910d35585cddb30b9fd0187898db7e0ea1ae5cc94a5bf7f100e81a4
|
3 |
+
size 9273
|
NL2TL-dataset/collect/log.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3a3e9ba897654fe39da61e0fc20687714fb1cd6da68d98eb604731b20d14fce
|
3 |
+
size 561367
|
NL2TL-dataset/collect/ltl copy.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
|
3 |
+
size 192556
|
NL2TL-dataset/collect/ltl.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
|
3 |
+
size 192556
|
NL2TL-dataset/collect/ltl_eng_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55879344ea92bf9073b1b605dda98cfe551b9169186dde31592d15c9a24d47e4
|
3 |
+
size 1043402
|
NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e61fbb4657c7b6d81d26c6a8fa596f03156dba6308b17b54125f79cd3734c058
|
3 |
+
size 1069183
|
NL2TL-dataset/collect/ltl_eng_test-aug.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
|
3 |
+
size 131838
|
NL2TL-dataset/collect/ltl_eng_test.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
|
3 |
+
size 131838
|
NL2TL-dataset/collect/ltl_eng_test_mid.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd4e75373318179d3cd3a2bbb330fdfe75376dabe77f475f31d95cf27f4d100c
|
3 |
+
size 137907
|
NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9185fd98062c160c363f35116a943a0950fdd2151bd7ff61cb9df39921ca73f3
|
3 |
+
size 137931
|
NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ca41a9c7ea7dd2151e861ac7754f36eee0e386f7a0dbbf7d2d5cabf927ccb7b
|
3 |
+
size 125920
|
NL2TL-dataset/collect/ltl_eng_train-aug.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
|
3 |
+
size 911564
|
NL2TL-dataset/collect/ltl_eng_train.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
|
3 |
+
size 911564
|
NL2TL-dataset/collect/ltl_eng_train_mid.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0daf22f37a11d38edaacfd837ccabf59fc7395431166bc50b0336e030faf77d1
|
3 |
+
size 937958
|
NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3674e9aabc015509efadbe1f8374627d5aaaacfb8fe3c717fa1ff852dcb8c4f
|
3 |
+
size 937958
|
NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cea13abc2204906d232ea88b78830ad6be09e01dbd5b7ad7ec9da9dff2f6c777
|
3 |
+
size 1129386
|
NL2TL-dataset/collect/ltl_mid_order.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:646500a7231278e0dd4ca0ed6fc2cc71ba4f3b7284a13a720a689a8e634db78e
|
3 |
+
size 225055
|
NL2TL-dataset/collect/ltl_mid_order_ascii.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f56781a159d3a52b58101038f0d15e1ef054b5f0290dd73228e51f4004243dd6
|
3 |
+
size 225073
|
NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f332db20802ecce5a8bd2879316e66061c97d438d18c7cbc6d4899b3bfbcaa44
|
3 |
+
size 19575
|
NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfca7f2b1129e28f89e4dd68d76406ad26404514d08864aaf1c76f3fa01eb132
|
3 |
+
size 40805
|
NL2TL-dataset/collect/note.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7798d0398769800f3cfc50fc902cec4cd1aa1eff05e3cb41790094c620f61039
|
3 |
+
size 59
|
NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c131c3486f4debdf0ed416248de2892d41627abbf2fcd83c69051b7ba69bfb2b
|
3 |
+
size 449106
|
NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930
|
3 |
+
size 1839113
|
NL2TL-dataset/collect2/LTLexplain_0.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08175568f9105c8dd5e8e5413e8a90834ab48b0cd2c8396ca1a71ab97f69ef68
|
3 |
+
size 149375
|
NL2TL-dataset/collect2/LTLexplain_1.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2a0f1e1bfe11526ca4ad549cf263fcfd60d18d9ee27cf77b889a3d0b8728d37
|
3 |
+
size 149123
|
NL2TL-dataset/collect2/LTLexplain_2.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64ee6dd1b5f76edc89b57cc4384a7dc1ea1af31940016f844c8a22ee0b98c0e4
|
3 |
+
size 82749
|
NL2TL-dataset/collect2/LTLexplain_3.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:052761b9c005136e13576c0bb4f279ad5c81e9e15abfd736e344028b32e8af89
|
3 |
+
size 73448
|
NL2TL-dataset/collect2/LTLexplain_4.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3fa922f8b8402b2f950c18de7e2360a0aa7158297251e0ea1fa2d6c26337ccf
|
3 |
+
size 147624
|
NL2TL-dataset/collect2/LTLsummary.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11079ebb03aa8d76763c690f7c59431f32c25ce385657b3e68daa1b15c7ff734
|
3 |
+
size 22393
|
NL2TL-dataset/collect2/getUniqueLTL.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re,os
|
3 |
+
|
4 |
+
def findUniqueLTL(paths:list):
|
5 |
+
ret={}
|
6 |
+
for path in paths:
|
7 |
+
with open(path,'r') as f:
|
8 |
+
jsonlists=f.readlines()
|
9 |
+
for jsonlist in jsonlists:
|
10 |
+
j=json.loads(jsonlist)
|
11 |
+
ret[j['raw_ltl']]=1
|
12 |
+
return ret
|
13 |
+
|
14 |
+
if __name__=='__main__':
|
15 |
+
path=['/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl']
|
16 |
+
LTLs=findUniqueLTL(paths=path)
|
17 |
+
with open(os.path.join('/home/user/xsj/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f :
|
18 |
+
f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':')))
|
19 |
+
|
20 |
+
|
NL2TL-dataset/collect2/lifted_data.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb
|
3 |
+
size 12458149
|
NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41061064f1591c833a29ea73ce1888aaa831b488b0f4f0f2a04994c871a42873
|
3 |
+
size 140979
|
NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24f9856d3e7f3d882de35caf5e3d79e70b783630178e1fcbad197b31431b2bfa
|
3 |
+
size 1264107
|
NL2TL-dataset/collect2/navi_total_refined.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8
|
3 |
+
size 3825939
|
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb
ADDED
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Augmentation by parapharsing"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "markdown",
|
12 |
+
"metadata": {},
|
13 |
+
"source": [
|
14 |
+
"## Init & Load Seed Data"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "code",
|
19 |
+
"execution_count": null,
|
20 |
+
"metadata": {},
|
21 |
+
"outputs": [],
|
22 |
+
"source": [
|
23 |
+
"import json, openai\n",
|
24 |
+
"from tqdm import tqdm "
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": null,
|
30 |
+
"metadata": {},
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"DOMAIN = \"drone-planning/\"\n",
|
34 |
+
"# DOMAIN = \"clean-up/\"\n",
|
35 |
+
"# DOMAIN = \"pick-and-place/\"\n",
|
36 |
+
"with open(DOMAIN + \"train_seed.jsonl\") as f:\n",
|
37 |
+
" train_seed = [json.loads(line) for line in f]"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"execution_count": null,
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [],
|
45 |
+
"source": [
|
46 |
+
"eng_seeds = {\n",
|
47 |
+
" seed['natural']: [] for seed in train_seed\n",
|
48 |
+
"}"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "markdown",
|
53 |
+
"metadata": {},
|
54 |
+
"source": [
|
55 |
+
"## Augmentation Code\n",
|
56 |
+
"prompting GPT-3 seems to work the best in this case"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "code",
|
61 |
+
"execution_count": null,
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [],
|
64 |
+
"source": [
|
65 |
+
"# You need to set your OPENAI API key here\n",
|
66 |
+
"# https://beta.openai.com/account/api-keys\n",
|
67 |
+
"openai.api_key = \"TO_BE_SET\""
|
68 |
+
]
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"cell_type": "code",
|
72 |
+
"execution_count": null,
|
73 |
+
"metadata": {},
|
74 |
+
"outputs": [],
|
75 |
+
"source": [
|
76 |
+
"def normalize(sentence):\n",
|
77 |
+
" # captialize first letter and add period at the end if not present\n",
|
78 |
+
" if sentence[0].islower():\n",
|
79 |
+
" sentence = sentence[0].upper() + sentence[1:]\n",
|
80 |
+
" if sentence[-1] != '.':\n",
|
81 |
+
" sentence = sentence + '.'\n",
|
82 |
+
" return sentence\n",
|
83 |
+
"\n",
|
84 |
+
"def parse_sentences_from_response(response):\n",
|
85 |
+
" lines = response.split('\\n')\n",
|
86 |
+
" # assert len(lines) == 5\n",
|
87 |
+
" assert len(lines) == 10\n",
|
88 |
+
" lines[0] = \"1.\" + lines[0]\n",
|
89 |
+
" paraphrases = []\n",
|
90 |
+
" for idx, line in enumerate(lines):\n",
|
91 |
+
" assert line.startswith(str(idx+1) + '. ')\n",
|
92 |
+
" sentence_start_idx = len(str(idx+1) + '. ')\n",
|
93 |
+
" paraphrases.append(line[sentence_start_idx:])\n",
|
94 |
+
" for paraphrase in paraphrases:\n",
|
95 |
+
" if paraphrase[-1] == ' ':\n",
|
96 |
+
" if paraphrase[-2] == '.':\n",
|
97 |
+
" paraphrase = paraphrase[:-1]\n",
|
98 |
+
" else:\n",
|
99 |
+
" paraphrase = paraphrase[:-2] + '.'\n",
|
100 |
+
" return paraphrases\n",
|
101 |
+
"\n",
|
102 |
+
"\n",
|
103 |
+
"PROMPT = \"\"\"Rephrase the source sentence in 10 different ways. Make the outputs as diverse as possible.\n",
|
104 |
+
"\n",
|
105 |
+
"Source: \n",
|
106 |
+
"SOURCE-TO-BE-PLACED\n",
|
107 |
+
"\n",
|
108 |
+
"Outputs:\n",
|
109 |
+
"1.\"\"\"\n",
|
110 |
+
"def rephrase_a_sentence(sentence):\n",
|
111 |
+
" response = openai.Completion.create(\n",
|
112 |
+
" model=\"text-davinci-002\",\n",
|
113 |
+
" prompt=PROMPT.replace(\"SOURCE-TO-BE-PLACED\", normalize(sentence)),\n",
|
114 |
+
" temperature=0.7,\n",
|
115 |
+
" max_tokens=512,\n",
|
116 |
+
" top_p=1,\n",
|
117 |
+
" best_of=1,\n",
|
118 |
+
" frequency_penalty=0.1,\n",
|
119 |
+
" presence_penalty=0\n",
|
120 |
+
" )\n",
|
121 |
+
" output = response['choices'][0]['text']\n",
|
122 |
+
" try:\n",
|
123 |
+
" paraphrases = parse_sentences_from_response(output)\n",
|
124 |
+
" except:\n",
|
125 |
+
" print(\"Error in parsing response\")\n",
|
126 |
+
" print(output)\n",
|
127 |
+
" return output, \"ERROR\"\n",
|
128 |
+
" return parse_sentences_from_response(output)"
|
129 |
+
]
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"cell_type": "code",
|
133 |
+
"execution_count": null,
|
134 |
+
"metadata": {},
|
135 |
+
"outputs": [],
|
136 |
+
"source": [
|
137 |
+
"O = rephrase_a_sentence(\"Go to the red room or go to the green room to finally go to the blue room.\")"
|
138 |
+
]
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"cell_type": "code",
|
142 |
+
"execution_count": null,
|
143 |
+
"metadata": {},
|
144 |
+
"outputs": [],
|
145 |
+
"source": [
|
146 |
+
"O"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "markdown",
|
151 |
+
"metadata": {},
|
152 |
+
"source": [
|
153 |
+
"## Run Augmentation"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"cell_type": "code",
|
158 |
+
"execution_count": null,
|
159 |
+
"metadata": {},
|
160 |
+
"outputs": [],
|
161 |
+
"source": [
|
162 |
+
"len(eng_seeds)"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": null,
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [],
|
170 |
+
"source": [
|
171 |
+
"list(eng_seeds.keys())[0]"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "code",
|
176 |
+
"execution_count": null,
|
177 |
+
"metadata": {},
|
178 |
+
"outputs": [],
|
179 |
+
"source": [
|
180 |
+
"def paraphrase_done(eng_seeds):\n",
|
181 |
+
" for eng_seed, extended in tqdm(eng_seeds.items()):\n",
|
182 |
+
" if len(extended) == 0:\n",
|
183 |
+
" return False\n",
|
184 |
+
" return True\n",
|
185 |
+
"\n",
|
186 |
+
"while not paraphrase_done(eng_seeds):\n",
|
187 |
+
" for eng_seed, extended in tqdm(eng_seeds.items()):\n",
|
188 |
+
" if len(extended) == 0:\n",
|
189 |
+
" extended += rephrase_a_sentence(eng_seed)"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"cell_type": "code",
|
194 |
+
"execution_count": null,
|
195 |
+
"metadata": {},
|
196 |
+
"outputs": [],
|
197 |
+
"source": [
|
198 |
+
"eng_seeds"
|
199 |
+
]
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"cell_type": "markdown",
|
203 |
+
"metadata": {},
|
204 |
+
"source": [
|
205 |
+
"### Dump as Training Data"
|
206 |
+
]
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"cell_type": "code",
|
210 |
+
"execution_count": null,
|
211 |
+
"metadata": {},
|
212 |
+
"outputs": [],
|
213 |
+
"source": [
|
214 |
+
"train_seed[0]"
|
215 |
+
]
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"cell_type": "code",
|
219 |
+
"execution_count": null,
|
220 |
+
"metadata": {},
|
221 |
+
"outputs": [],
|
222 |
+
"source": [
|
223 |
+
"with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
|
224 |
+
" for seed in train_seed:\n",
|
225 |
+
" f.write(json.dumps(seed) + '\\n')\n",
|
226 |
+
" for aug_eng in eng_seeds[seed['natural']]:\n",
|
227 |
+
" f.write(json.dumps({\n",
|
228 |
+
" 'natural': aug_eng,\n",
|
229 |
+
" 'canonical': seed['canonical'],\n",
|
230 |
+
" 'formula': seed['formula']\n",
|
231 |
+
" }) + '\\n')"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": null,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [],
|
239 |
+
"source": [
|
240 |
+
"with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
|
241 |
+
" for seed in train_seed:\n",
|
242 |
+
" f.write(json.dumps(seed) + '\\n')"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"cell_type": "markdown",
|
247 |
+
"metadata": {},
|
248 |
+
"source": [
|
249 |
+
"### Normalize the natural language form "
|
250 |
+
]
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"cell_type": "code",
|
254 |
+
"execution_count": null,
|
255 |
+
"metadata": {},
|
256 |
+
"outputs": [],
|
257 |
+
"source": [
|
258 |
+
"if DOMAIN == \"clean-up/\":\n",
|
259 |
+
" # in clean up, golden natural language data comes without period at the end, no capitalization in the beginning\n",
|
260 |
+
" def clean_up_normalize(sentence):\n",
|
261 |
+
" if sentence[0].isupper():\n",
|
262 |
+
" sentence = sentence[0].lower() + sentence[1:]\n",
|
263 |
+
" if sentence[-1] == '.':\n",
|
264 |
+
" sentence = sentence[:-1]\n",
|
265 |
+
" return sentence\n",
|
266 |
+
"\n",
|
267 |
+
" buffer = []\n",
|
268 |
+
" with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
|
269 |
+
" for l in f.readlines():\n",
|
270 |
+
" buffer.append(json.loads(l))\n",
|
271 |
+
" \n",
|
272 |
+
" with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
|
273 |
+
" for dp in buffer:\n",
|
274 |
+
" f.write(json.dumps({\n",
|
275 |
+
" 'natural': clean_up_normalize(dp['natural']),\n",
|
276 |
+
" 'canonical': dp['canonical'],\n",
|
277 |
+
" 'formula': dp['formula']\n",
|
278 |
+
" }) + '\\n')\n",
|
279 |
+
"\n",
|
280 |
+
"if DOMAIN == \"pick-and-place/\":\n",
|
281 |
+
" # in pick and place, golden natural language data comes without period at the end, no capitalization in the beginning\n",
|
282 |
+
" def clean_up_normalize(sentence):\n",
|
283 |
+
" if sentence[0].isupper():\n",
|
284 |
+
" sentence = sentence[0].lower() + sentence[1:]\n",
|
285 |
+
" if sentence[-1] == '.':\n",
|
286 |
+
" sentence = sentence[:-1]\n",
|
287 |
+
" return sentence\n",
|
288 |
+
"\n",
|
289 |
+
" buffer = []\n",
|
290 |
+
" with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
|
291 |
+
" for l in f.readlines():\n",
|
292 |
+
" buffer.append(json.loads(l))\n",
|
293 |
+
" \n",
|
294 |
+
" with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
|
295 |
+
" for dp in buffer:\n",
|
296 |
+
" f.write(json.dumps({\n",
|
297 |
+
" 'natural': clean_up_normalize(dp['natural']),\n",
|
298 |
+
" 'canonical': dp['canonical'],\n",
|
299 |
+
" 'formula': dp['formula']\n",
|
300 |
+
" }) + '\\n')"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "code",
|
305 |
+
"execution_count": null,
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [],
|
308 |
+
"source": [
|
309 |
+
"if DOMAIN == \"drone-planning/\":\n",
|
310 |
+
" # in clean up, golden natural language data comes with a \"space + period\" at the end, no capitalization in the beginning\n",
|
311 |
+
" def clean_up_normalize(sentence):\n",
|
312 |
+
" if sentence[0].isupper():\n",
|
313 |
+
" sentence = sentence[0].lower() + sentence[1:]\n",
|
314 |
+
" while sentence[-1] == ' ' or sentence[-1] == '.' or sentence[-1] == '!':\n",
|
315 |
+
" sentence = sentence[:-1]\n",
|
316 |
+
" sentence = sentence + '.'\n",
|
317 |
+
" sentence = sentence.replace('.', ' .')\n",
|
318 |
+
" sentence = sentence.replace(',', ' ,')\n",
|
319 |
+
" return sentence\n",
|
320 |
+
"\n",
|
321 |
+
" buffer = []\n",
|
322 |
+
" # with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
|
323 |
+
" # for l in f.readlines():\n",
|
324 |
+
" # buffer.append(json.loads(l))\n",
|
325 |
+
" \n",
|
326 |
+
" # with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
|
327 |
+
" # for dp in buffer:\n",
|
328 |
+
" # f.write(json.dumps({\n",
|
329 |
+
" # 'natural': clean_up_normalize(dp['natural']),\n",
|
330 |
+
" # 'canonical': dp['canonical'],\n",
|
331 |
+
" # 'formula': dp['formula']\n",
|
332 |
+
" # }) + '\\n')\n",
|
333 |
+
" with open(DOMAIN + \"syn.train.jsonl\", 'r') as f:\n",
|
334 |
+
" for l in f.readlines():\n",
|
335 |
+
" buffer.append(json.loads(l))\n",
|
336 |
+
" \n",
|
337 |
+
" with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
|
338 |
+
" for dp in buffer:\n",
|
339 |
+
" f.write(json.dumps({\n",
|
340 |
+
" 'natural': clean_up_normalize(dp['natural']),\n",
|
341 |
+
" 'canonical': dp['canonical'],\n",
|
342 |
+
" 'formula': dp['formula']\n",
|
343 |
+
" }) + '\\n')"
|
344 |
+
]
|
345 |
+
}
|
346 |
+
],
|
347 |
+
"metadata": {
|
348 |
+
"kernelspec": {
|
349 |
+
"display_name": "GPML",
|
350 |
+
"language": "python",
|
351 |
+
"name": "python3"
|
352 |
+
},
|
353 |
+
"language_info": {
|
354 |
+
"codemirror_mode": {
|
355 |
+
"name": "ipython",
|
356 |
+
"version": 3
|
357 |
+
},
|
358 |
+
"file_extension": ".py",
|
359 |
+
"mimetype": "text/x-python",
|
360 |
+
"name": "python",
|
361 |
+
"nbconvert_exporter": "python",
|
362 |
+
"pygments_lexer": "ipython3",
|
363 |
+
"version": "3.7.13"
|
364 |
+
},
|
365 |
+
"orig_nbformat": 4,
|
366 |
+
"vscode": {
|
367 |
+
"interpreter": {
|
368 |
+
"hash": "75567ad983eac98a78c1e40a895e8d82557b42cf9969286235abec07ddbf9e7d"
|
369 |
+
}
|
370 |
+
}
|
371 |
+
},
|
372 |
+
"nbformat": 4,
|
373 |
+
"nbformat_minor": 2
|
374 |
+
}
|
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dfa917383f4e67d55a159050ffa60c83dea987236ef516edf7d2e45e16689d3
|
3 |
+
size 7890
|
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7554f76c648d84596c8a77458bd615550be66d0903143146a8497797205315f4
|
3 |
+
size 12442
|
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24cc3906e8d32f3a1bad606ac8d262608065aa6ffbe3065fdf98b4b1fb754bc7
|
3 |
+
size 599574
|