tt-dart commited on
Commit
784a7e2
·
1 Parent(s): bacb17b

add dataset

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. NL2TL-dataset/NLTLsummary.json +3 -0
  3. NL2TL-dataset/collect/Cleaned_ENG.txt +3 -0
  4. NL2TL-dataset/collect/Cleaned_LTL.txt +3 -0
  5. NL2TL-dataset/collect/UNCleaned_ENG.txt +3 -0
  6. NL2TL-dataset/collect/UNCleaned_LTL.txt +3 -0
  7. NL2TL-dataset/collect/UNCleaned_num.txt +3 -0
  8. NL2TL-dataset/collect/eng.txt +3 -0
  9. NL2TL-dataset/collect/eng_gpt_auged.txt +3 -0
  10. NL2TL-dataset/collect/eng_gpt_auged2.txt +3 -0
  11. NL2TL-dataset/collect/eng改过了不太好.txt +3 -0
  12. NL2TL-dataset/collect/idxsrc_gpt_auged.txt +3 -0
  13. NL2TL-dataset/collect/idxsrc_gpt_auged2.txt +3 -0
  14. NL2TL-dataset/collect/log.jsonl +3 -0
  15. NL2TL-dataset/collect/ltl copy.txt +3 -0
  16. NL2TL-dataset/collect/ltl.txt +3 -0
  17. NL2TL-dataset/collect/ltl_eng_1.jsonl +3 -0
  18. NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl +3 -0
  19. NL2TL-dataset/collect/ltl_eng_test-aug.jsonl +3 -0
  20. NL2TL-dataset/collect/ltl_eng_test.jsonl +3 -0
  21. NL2TL-dataset/collect/ltl_eng_test_mid.jsonl +3 -0
  22. NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl +3 -0
  23. NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl +3 -0
  24. NL2TL-dataset/collect/ltl_eng_train-aug.jsonl +3 -0
  25. NL2TL-dataset/collect/ltl_eng_train.jsonl +3 -0
  26. NL2TL-dataset/collect/ltl_eng_train_mid.jsonl +3 -0
  27. NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl +3 -0
  28. NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl +3 -0
  29. NL2TL-dataset/collect/ltl_mid_order.txt +3 -0
  30. NL2TL-dataset/collect/ltl_mid_order_ascii.txt +3 -0
  31. NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt +3 -0
  32. NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt +3 -0
  33. NL2TL-dataset/collect/note.txt +3 -0
  34. NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl +3 -0
  35. NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl +3 -0
  36. NL2TL-dataset/collect2/LTLexplain_0.json +3 -0
  37. NL2TL-dataset/collect2/LTLexplain_1.json +3 -0
  38. NL2TL-dataset/collect2/LTLexplain_2.json +3 -0
  39. NL2TL-dataset/collect2/LTLexplain_3.json +3 -0
  40. NL2TL-dataset/collect2/LTLexplain_4.json +3 -0
  41. NL2TL-dataset/collect2/LTLsummary.json +3 -0
  42. NL2TL-dataset/collect2/getUniqueLTL.py +20 -0
  43. NL2TL-dataset/collect2/lifted_data.jsonl +3 -0
  44. NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl +3 -0
  45. NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl +3 -0
  46. NL2TL-dataset/collect2/navi_total_refined.jsonl +3 -0
  47. NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb +374 -0
  48. NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json +3 -0
  49. NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json +3 -0
  50. NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ *.json filter=lfs diff=lfs merge=lfs -text
38
+ *.csv filter=lfs diff=lfs merge=lfs -text
39
+ *.txt filter=lfs diff=lfs merge=lfs -text
NL2TL-dataset/NLTLsummary.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6291896067a809119e01b43734779333c2d6b7baa36376afe8aa6579b4ba77ee
3
+ size 23025
NL2TL-dataset/collect/Cleaned_ENG.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f8e6b696147098eb16be8ec7a2891a493c008e7d997f994d1b77fea4255559
3
+ size 447181
NL2TL-dataset/collect/Cleaned_LTL.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ced5c8ee114bc50e480c44999e991337318749dd7f5988379c0f2cd3c5940d2
3
+ size 252706
NL2TL-dataset/collect/UNCleaned_ENG.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29fe1e57c68d9096705eae98f6456aa24eb86ecc2a43fa7896785a30eba1c3d0
3
+ size 79381
NL2TL-dataset/collect/UNCleaned_LTL.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f903103fcdc5b86bf1aca3568fd5483bfab90412fb1581006c0433c4ac0feecc
3
+ size 34258
NL2TL-dataset/collect/UNCleaned_num.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50067192dcabdc789ff68998a4b67b44d5ea71ad870b2f3029f06a62e93cd1cc
3
+ size 7639
NL2TL-dataset/collect/eng.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834f288fc7c4bf3c27e829db10d72f673b55551f5e50591e9ff05461b78417e7
3
+ size 384436
NL2TL-dataset/collect/eng_gpt_auged.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d039a94cfde70475d1c935e80286d0c3bb9c578f01c86cf8f65aed015fdf14
3
+ size 46038
NL2TL-dataset/collect/eng_gpt_auged2.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6484a6ace4d0bdd6e00b337aa2e734d4cd3cfdbcd03f90a960ea3859381ffaad
3
+ size 96837
NL2TL-dataset/collect/eng改过了不太好.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74a45639f6d71263ea70768e40a4d299137fca66de36da7657a15cbba60beb9
3
+ size 379834
NL2TL-dataset/collect/idxsrc_gpt_auged.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce789edf73241091f48e1e3a2b4eddf06ae0244fc20d4f050cbed474afd79ab6
3
+ size 4425
NL2TL-dataset/collect/idxsrc_gpt_auged2.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71b50098910d35585cddb30b9fd0187898db7e0ea1ae5cc94a5bf7f100e81a4
3
+ size 9273
NL2TL-dataset/collect/log.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a3e9ba897654fe39da61e0fc20687714fb1cd6da68d98eb604731b20d14fce
3
+ size 561367
NL2TL-dataset/collect/ltl copy.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
3
+ size 192556
NL2TL-dataset/collect/ltl.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161b2a0b67725db0aa2e803c4576fc47cfb0698f66469c660941a0ddbcaba76c
3
+ size 192556
NL2TL-dataset/collect/ltl_eng_1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55879344ea92bf9073b1b605dda98cfe551b9169186dde31592d15c9a24d47e4
3
+ size 1043402
NL2TL-dataset/collect/ltl_eng_mid_order_1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61fbb4657c7b6d81d26c6a8fa596f03156dba6308b17b54125f79cd3734c058
3
+ size 1069183
NL2TL-dataset/collect/ltl_eng_test-aug.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
3
+ size 131838
NL2TL-dataset/collect/ltl_eng_test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0982bf8999ef200b1d7fa5c867b70e41caf136b7a4422e73eb303f7ee77b02dd
3
+ size 131838
NL2TL-dataset/collect/ltl_eng_test_mid.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4e75373318179d3cd3a2bbb330fdfe75376dabe77f475f31d95cf27f4d100c
3
+ size 137907
NL2TL-dataset/collect/ltl_eng_test_mid_ascii.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9185fd98062c160c363f35116a943a0950fdd2151bd7ff61cb9df39921ca73f3
3
+ size 137931
NL2TL-dataset/collect/ltl_eng_test_mid_ascii_gptAuged.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca41a9c7ea7dd2151e861ac7754f36eee0e386f7a0dbbf7d2d5cabf927ccb7b
3
+ size 125920
NL2TL-dataset/collect/ltl_eng_train-aug.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
3
+ size 911564
NL2TL-dataset/collect/ltl_eng_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4817c9d380816056e05c0bec1f985ac90dbc1068417db5d033d15bfe4273d279
3
+ size 911564
NL2TL-dataset/collect/ltl_eng_train_mid.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0daf22f37a11d38edaacfd837ccabf59fc7395431166bc50b0336e030faf77d1
3
+ size 937958
NL2TL-dataset/collect/ltl_eng_train_mid_ascii.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3674e9aabc015509efadbe1f8374627d5aaaacfb8fe3c717fa1ff852dcb8c4f
3
+ size 937958
NL2TL-dataset/collect/ltl_eng_train_mid_ascii_gptAuged.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea13abc2204906d232ea88b78830ad6be09e01dbd5b7ad7ec9da9dff2f6c777
3
+ size 1129386
NL2TL-dataset/collect/ltl_mid_order.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646500a7231278e0dd4ca0ed6fc2cc71ba4f3b7284a13a720a689a8e634db78e
3
+ size 225055
NL2TL-dataset/collect/ltl_mid_order_ascii.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56781a159d3a52b58101038f0d15e1ef054b5f0290dd73228e51f4004243dd6
3
+ size 225073
NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f332db20802ecce5a8bd2879316e66061c97d438d18c7cbc6d4899b3bfbcaa44
3
+ size 19575
NL2TL-dataset/collect/ltl_mid_order_ascii_gpt_auged2.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfca7f2b1129e28f89e4dd68d76406ad26404514d08864aaf1c76f3fa01eb132
3
+ size 40805
NL2TL-dataset/collect/note.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7798d0398769800f3cfc50fc902cec4cd1aa1eff05e3cb41790094c620f61039
3
+ size 59
NL2TL-dataset/collect2/CW_total_3382_for_transfer_word_midfix.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c131c3486f4debdf0ed416248de2892d41627abbf2fcd83c69051b7ba69bfb2b
3
+ size 449106
NL2TL-dataset/collect2/GLTL_train_8923_for_transfer_word_midfix.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7aff94ba621fb7def1b174b2f5ee0e1336b572ed65bb7c15f03f1295156930
3
+ size 1839113
NL2TL-dataset/collect2/LTLexplain_0.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08175568f9105c8dd5e8e5413e8a90834ab48b0cd2c8396ca1a71ab97f69ef68
3
+ size 149375
NL2TL-dataset/collect2/LTLexplain_1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a0f1e1bfe11526ca4ad549cf263fcfd60d18d9ee27cf77b889a3d0b8728d37
3
+ size 149123
NL2TL-dataset/collect2/LTLexplain_2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ee6dd1b5f76edc89b57cc4384a7dc1ea1af31940016f844c8a22ee0b98c0e4
3
+ size 82749
NL2TL-dataset/collect2/LTLexplain_3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052761b9c005136e13576c0bb4f279ad5c81e9e15abfd736e344028b32e8af89
3
+ size 73448
NL2TL-dataset/collect2/LTLexplain_4.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fa922f8b8402b2f950c18de7e2360a0aa7158297251e0ea1fa2d6c26337ccf
3
+ size 147624
NL2TL-dataset/collect2/LTLsummary.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11079ebb03aa8d76763c690f7c59431f32c25ce385657b3e68daa1b15c7ff734
3
+ size 22393
NL2TL-dataset/collect2/getUniqueLTL.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re,os
3
+
4
+ def findUniqueLTL(paths:list):
5
+ ret={}
6
+ for path in paths:
7
+ with open(path,'r') as f:
8
+ jsonlists=f.readlines()
9
+ for jsonlist in jsonlists:
10
+ j=json.loads(jsonlist)
11
+ ret[j['raw_ltl']]=1
12
+ return ret
13
+
14
+ if __name__=='__main__':
15
+ path=['/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl','/home/user/xsj/NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl']
16
+ LTLs=findUniqueLTL(paths=path)
17
+ with open(os.path.join('/home/user/xsj/NL2TL-dataset/collect2','NLTLsummary.json'),'w') as f :
18
+ f.write(json.dumps(LTLs,sort_keys=False,indent=4,separators=(',',':')))
19
+
20
+
NL2TL-dataset/collect2/lifted_data.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68ab33843fe0485380f33b6e49cb7c9230eae66252d869083584dd7ef048afb
3
+ size 12458149
NL2TL-dataset/collect2/ltl_eng_test_mid_ascii_gptAuged.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41061064f1591c833a29ea73ce1888aaa831b488b0f4f0f2a04994c871a42873
3
+ size 140979
NL2TL-dataset/collect2/ltl_eng_train_mid_ascii_gptAuged.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24f9856d3e7f3d882de35caf5e3d79e70b783630178e1fcbad197b31431b2bfa
3
+ size 1264107
NL2TL-dataset/collect2/navi_total_refined.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11499148f1f053c86476f08ec39f9cb0cb724eb9451d0153e2f34c77a04855b8
3
+ size 3825939
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/augment.ipynb ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Augmentation by parapharsing"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "## Init & Load Seed Data"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "import json, openai\n",
24
+ "from tqdm import tqdm "
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "DOMAIN = \"drone-planning/\"\n",
34
+ "# DOMAIN = \"clean-up/\"\n",
35
+ "# DOMAIN = \"pick-and-place/\"\n",
36
+ "with open(DOMAIN + \"train_seed.jsonl\") as f:\n",
37
+ " train_seed = [json.loads(line) for line in f]"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "eng_seeds = {\n",
47
+ " seed['natural']: [] for seed in train_seed\n",
48
+ "}"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "markdown",
53
+ "metadata": {},
54
+ "source": [
55
+ "## Augmentation Code\n",
56
+ "prompting GPT-3 seems to work the best in this case"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "# You need to set your OPENAI API key here\n",
66
+ "# https://beta.openai.com/account/api-keys\n",
67
+ "openai.api_key = \"TO_BE_SET\""
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "def normalize(sentence):\n",
77
+ " # captialize first letter and add period at the end if not present\n",
78
+ " if sentence[0].islower():\n",
79
+ " sentence = sentence[0].upper() + sentence[1:]\n",
80
+ " if sentence[-1] != '.':\n",
81
+ " sentence = sentence + '.'\n",
82
+ " return sentence\n",
83
+ "\n",
84
+ "def parse_sentences_from_response(response):\n",
85
+ " lines = response.split('\\n')\n",
86
+ " # assert len(lines) == 5\n",
87
+ " assert len(lines) == 10\n",
88
+ " lines[0] = \"1.\" + lines[0]\n",
89
+ " paraphrases = []\n",
90
+ " for idx, line in enumerate(lines):\n",
91
+ " assert line.startswith(str(idx+1) + '. ')\n",
92
+ " sentence_start_idx = len(str(idx+1) + '. ')\n",
93
+ " paraphrases.append(line[sentence_start_idx:])\n",
94
+ " for paraphrase in paraphrases:\n",
95
+ " if paraphrase[-1] == ' ':\n",
96
+ " if paraphrase[-2] == '.':\n",
97
+ " paraphrase = paraphrase[:-1]\n",
98
+ " else:\n",
99
+ " paraphrase = paraphrase[:-2] + '.'\n",
100
+ " return paraphrases\n",
101
+ "\n",
102
+ "\n",
103
+ "PROMPT = \"\"\"Rephrase the source sentence in 10 different ways. Make the outputs as diverse as possible.\n",
104
+ "\n",
105
+ "Source: \n",
106
+ "SOURCE-TO-BE-PLACED\n",
107
+ "\n",
108
+ "Outputs:\n",
109
+ "1.\"\"\"\n",
110
+ "def rephrase_a_sentence(sentence):\n",
111
+ " response = openai.Completion.create(\n",
112
+ " model=\"text-davinci-002\",\n",
113
+ " prompt=PROMPT.replace(\"SOURCE-TO-BE-PLACED\", normalize(sentence)),\n",
114
+ " temperature=0.7,\n",
115
+ " max_tokens=512,\n",
116
+ " top_p=1,\n",
117
+ " best_of=1,\n",
118
+ " frequency_penalty=0.1,\n",
119
+ " presence_penalty=0\n",
120
+ " )\n",
121
+ " output = response['choices'][0]['text']\n",
122
+ " try:\n",
123
+ " paraphrases = parse_sentences_from_response(output)\n",
124
+ " except:\n",
125
+ " print(\"Error in parsing response\")\n",
126
+ " print(output)\n",
127
+ " return output, \"ERROR\"\n",
128
+ " return parse_sentences_from_response(output)"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": null,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "O = rephrase_a_sentence(\"Go to the red room or go to the green room to finally go to the blue room.\")"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "metadata": {},
144
+ "outputs": [],
145
+ "source": [
146
+ "O"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "markdown",
151
+ "metadata": {},
152
+ "source": [
153
+ "## Run Augmentation"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "len(eng_seeds)"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": [
171
+ "list(eng_seeds.keys())[0]"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": null,
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": [
180
+ "def paraphrase_done(eng_seeds):\n",
181
+ " for eng_seed, extended in tqdm(eng_seeds.items()):\n",
182
+ " if len(extended) == 0:\n",
183
+ " return False\n",
184
+ " return True\n",
185
+ "\n",
186
+ "while not paraphrase_done(eng_seeds):\n",
187
+ " for eng_seed, extended in tqdm(eng_seeds.items()):\n",
188
+ " if len(extended) == 0:\n",
189
+ " extended += rephrase_a_sentence(eng_seed)"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": null,
195
+ "metadata": {},
196
+ "outputs": [],
197
+ "source": [
198
+ "eng_seeds"
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "markdown",
203
+ "metadata": {},
204
+ "source": [
205
+ "### Dump as Training Data"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": null,
211
+ "metadata": {},
212
+ "outputs": [],
213
+ "source": [
214
+ "train_seed[0]"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": null,
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
224
+ " for seed in train_seed:\n",
225
+ " f.write(json.dumps(seed) + '\\n')\n",
226
+ " for aug_eng in eng_seeds[seed['natural']]:\n",
227
+ " f.write(json.dumps({\n",
228
+ " 'natural': aug_eng,\n",
229
+ " 'canonical': seed['canonical'],\n",
230
+ " 'formula': seed['formula']\n",
231
+ " }) + '\\n')"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
241
+ " for seed in train_seed:\n",
242
+ " f.write(json.dumps(seed) + '\\n')"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "markdown",
247
+ "metadata": {},
248
+ "source": [
249
+ "### Normalize the natural language form "
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": null,
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "if DOMAIN == \"clean-up/\":\n",
259
+ " # in clean up, golden natural language data comes without period at the end, no capitalization in the beginning\n",
260
+ " def clean_up_normalize(sentence):\n",
261
+ " if sentence[0].isupper():\n",
262
+ " sentence = sentence[0].lower() + sentence[1:]\n",
263
+ " if sentence[-1] == '.':\n",
264
+ " sentence = sentence[:-1]\n",
265
+ " return sentence\n",
266
+ "\n",
267
+ " buffer = []\n",
268
+ " with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
269
+ " for l in f.readlines():\n",
270
+ " buffer.append(json.loads(l))\n",
271
+ " \n",
272
+ " with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
273
+ " for dp in buffer:\n",
274
+ " f.write(json.dumps({\n",
275
+ " 'natural': clean_up_normalize(dp['natural']),\n",
276
+ " 'canonical': dp['canonical'],\n",
277
+ " 'formula': dp['formula']\n",
278
+ " }) + '\\n')\n",
279
+ "\n",
280
+ "if DOMAIN == \"pick-and-place/\":\n",
281
+ " # in pick and place, golden natural language data comes without period at the end, no capitalization in the beginning\n",
282
+ " def clean_up_normalize(sentence):\n",
283
+ " if sentence[0].isupper():\n",
284
+ " sentence = sentence[0].lower() + sentence[1:]\n",
285
+ " if sentence[-1] == '.':\n",
286
+ " sentence = sentence[:-1]\n",
287
+ " return sentence\n",
288
+ "\n",
289
+ " buffer = []\n",
290
+ " with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
291
+ " for l in f.readlines():\n",
292
+ " buffer.append(json.loads(l))\n",
293
+ " \n",
294
+ " with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
295
+ " for dp in buffer:\n",
296
+ " f.write(json.dumps({\n",
297
+ " 'natural': clean_up_normalize(dp['natural']),\n",
298
+ " 'canonical': dp['canonical'],\n",
299
+ " 'formula': dp['formula']\n",
300
+ " }) + '\\n')"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": null,
306
+ "metadata": {},
307
+ "outputs": [],
308
+ "source": [
309
+ "if DOMAIN == \"drone-planning/\":\n",
310
+ " # in clean up, golden natural language data comes with a \"space + period\" at the end, no capitalization in the beginning\n",
311
+ " def clean_up_normalize(sentence):\n",
312
+ " if sentence[0].isupper():\n",
313
+ " sentence = sentence[0].lower() + sentence[1:]\n",
314
+ " while sentence[-1] == ' ' or sentence[-1] == '.' or sentence[-1] == '!':\n",
315
+ " sentence = sentence[:-1]\n",
316
+ " sentence = sentence + '.'\n",
317
+ " sentence = sentence.replace('.', ' .')\n",
318
+ " sentence = sentence.replace(',', ' ,')\n",
319
+ " return sentence\n",
320
+ "\n",
321
+ " buffer = []\n",
322
+ " # with open(DOMAIN + \"syn-aug.train.jsonl\", 'r') as f:\n",
323
+ " # for l in f.readlines():\n",
324
+ " # buffer.append(json.loads(l))\n",
325
+ " \n",
326
+ " # with open(DOMAIN + \"syn-aug.train.jsonl\", 'w') as f:\n",
327
+ " # for dp in buffer:\n",
328
+ " # f.write(json.dumps({\n",
329
+ " # 'natural': clean_up_normalize(dp['natural']),\n",
330
+ " # 'canonical': dp['canonical'],\n",
331
+ " # 'formula': dp['formula']\n",
332
+ " # }) + '\\n')\n",
333
+ " with open(DOMAIN + \"syn.train.jsonl\", 'r') as f:\n",
334
+ " for l in f.readlines():\n",
335
+ " buffer.append(json.loads(l))\n",
336
+ " \n",
337
+ " with open(DOMAIN + \"syn.train.jsonl\", 'w') as f:\n",
338
+ " for dp in buffer:\n",
339
+ " f.write(json.dumps({\n",
340
+ " 'natural': clean_up_normalize(dp['natural']),\n",
341
+ " 'canonical': dp['canonical'],\n",
342
+ " 'formula': dp['formula']\n",
343
+ " }) + '\\n')"
344
+ ]
345
+ }
346
+ ],
347
+ "metadata": {
348
+ "kernelspec": {
349
+ "display_name": "GPML",
350
+ "language": "python",
351
+ "name": "python3"
352
+ },
353
+ "language_info": {
354
+ "codemirror_mode": {
355
+ "name": "ipython",
356
+ "version": 3
357
+ },
358
+ "file_extension": ".py",
359
+ "mimetype": "text/x-python",
360
+ "name": "python",
361
+ "nbconvert_exporter": "python",
362
+ "pygments_lexer": "ipython3",
363
+ "version": "3.7.13"
364
+ },
365
+ "orig_nbformat": 4,
366
+ "vscode": {
367
+ "interpreter": {
368
+ "hash": "75567ad983eac98a78c1e40a895e8d82557b42cf9969286235abec07ddbf9e7d"
369
+ }
370
+ }
371
+ },
372
+ "nbformat": 4,
373
+ "nbformat_minor": 2
374
+ }
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical-lifted.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfa917383f4e67d55a159050ffa60c83dea987236ef516edf7d2e45e16689d3
3
+ size 7890
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/canonical.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7554f76c648d84596c8a77458bd615550be66d0903143146a8497797205315f4
3
+ size 12442
NL2TL-dataset/datasets-Efficient-Eng-2-LTL/clean-up/golden-lifted.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cc3906e8d32f3a1bad606ac8d262608065aa6ffbe3065fdf98b4b1fb754bc7
3
+ size 599574