{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with open(\"raw/ltl.txt\", \"r\") as f:\n",
" raw_ltls = f.read().splitlines()\n",
" unique_ltls = set(raw_ltls)\n",
"with open(\"raw/eng.txt\", 'r') as f:\n",
" raw_engs = f.read().splitlines()\n",
"\n",
"DPs = []\n",
"for ltl, eng in zip(raw_ltls, raw_engs):\n",
" DPs.append({'ltl': ltl, 'eng': eng})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"G (P04)\n",
"F G ! P04\n",
"(! P02) U P01\n",
"G(P03 -> P02)\n",
"F (P03 && P02)\n",
"F P01 && F P02\n",
"G !(P01 & P03)\n",
"G (P02 && P01)\n",
"G(!(P01 & P02))\n",
"G (P03 -> F P06)\n",
"G F P01 || X P02\n",
"P03 U (G (F P04))\n",
"G ( P02 -> X P04)\n",
"G( P01 -> F P02 )\n",
"G (P02 -> X F P01)\n",
"G F P01 || G F P02\n",
" G (P01 <-> X P02)\n",
"G F P03 -> G F P02\n",
"G (P01 -> X G ! P02)\n",
" G (P03 | P04 | P05)\n",
"(P03 U P02) || G P03\n",
" ! G (! (P02 && X P02))\n",
"G(P01 -> X (X (X P02)))\n",
"G P02 && G(P03 -> !P01)\n",
" ! G (! (P01 && X P01))\n",
"G (P03 -> (P04 | X P04))\n",
"G (!(P01 && P02) -> F P03)\n",
"G(P01 -> F P02) -> G F P03\n",
"G (P01 && X P02 -> X X P04)\n",
" (P02 U (P02 & ! P01)) | G P02\n",
"P03 -> X ((P02 U P01) || G P02)\n",
"G(!(P02 &&P04)) && G(P02 || P04)\n",
"(F P05) -> (!P05 U (P06 & !P05))\n",
"G ((P04 <-> P02) -> (P03 <-> P05))\n",
"G ( P01 -> (X !P01 | XX !P01 | XXX !P01))\n",
"P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)\n"
]
}
],
"source": [
"for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
" print(ltl)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Drone-planning Domain, with augmentation\n",
"Number of Data Points 756\n",
"Number of unique LTL expressions: 36\n",
"Number of unique LTL structures: 5\n"
]
}
],
"source": [
"print(\"Drone-planning Domain, with augmentation\")\n",
"print(\"Number of Data Points\", len(DPs))\n",
"print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
"print(\"Number of unique LTL structures:\", 5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"F ( AP )\n",
"- Go to AP\n",
"\n",
"F ( AP1 & F ( AP2 ) )\n",
"- First go to AP1 and then to AP2\n",
"\n",
"\n",
"F ( AP1 ) & G ( AP2 )\n",
"- Go to AP1 and Always in AP2\n",
"\n",
"\n",
"F ( AP1 ) & ~ ( AP2 )\n",
"- Go to AP1 and avoid AP2 \n",
"\n",
"~ ( AP1 ) U ( AP2 )\n",
"- avoid going through AP1 until go through AP2\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up LTL expressions "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"ori_aps = set()\n",
"for unique_ltl in unique_ltls:\n",
" ori_aps.update(unique_ltl.split())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'!',\n",
" '!(P01',\n",
" '!P01',\n",
" '!P01)',\n",
" '!P01))',\n",
" '!P05))',\n",
" '&',\n",
" '&&',\n",
" '&&P04))',\n",
" '(',\n",
" '(!',\n",
" '(!(P01',\n",
" '(!P05',\n",
" '((P02',\n",
" '((P04',\n",
" '(F',\n",
" '(G',\n",
" '(P01',\n",
" '(P02',\n",
" '(P03',\n",
" '(P04',\n",
" '(P04)',\n",
" '(P06',\n",
" '(X',\n",
" ')',\n",
" '->',\n",
" '<->',\n",
" 'F',\n",
" 'G',\n",
" 'G(',\n",
" 'G(!(P01',\n",
" 'G(!(P02',\n",
" 'G(P01',\n",
" 'G(P02',\n",
" 'G(P03',\n",
" 'P01',\n",
" 'P01)',\n",
" 'P01))',\n",
" 'P02',\n",
" 'P02)',\n",
" 'P02))',\n",
" 'P02)))',\n",
" 'P03',\n",
" 'P03)',\n",
" 'P04',\n",
" 'P04)',\n",
" 'P04))',\n",
" 'P05)',\n",
" 'P05))',\n",
" 'P06)',\n",
" 'U',\n",
" 'X',\n",
" 'XX',\n",
" 'XXX',\n",
" '|',\n",
" '||'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ori_aps"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"APs = [\n",
" {'ap': 'blue_room', 'eng': 'the blue room'}, \n",
" {'ap': 'green_room', 'eng': 'the green room'},\n",
" {'ap': 'orange_room', 'eng': 'the orange room'},\n",
" {'ap': 'purple_room', 'eng': 'the purple room'},\n",
" {'ap': 'red_room', 'eng': 'the red room'},\n",
" {'ap': 'yellow_room', 'eng': 'the yellow room'},\n",
" {'ap': 'first_floor', 'eng': 'the first floor'},\n",
" {'ap': 'second_floor', 'eng': 'the second floor'},\n",
" {'ap': 'third_floor', 'eng': 'the third floor'},\n",
" {'ap': 'landmark_1', 'eng': 'landmark 1'},\n",
" {'ap': 'landmark_2', 'eng': 'landmark 2'},\n",
" {'ap': 'landmark_3', 'eng': 'landmark 3'},\n",
"]\n",
"APs = [\n",
" {'ap': 'P01', 'eng': 'P01'},\n",
" {'ap': 'P02', 'eng': 'P02'},\n",
" {'ap': 'P03', 'eng': 'P03'},\n",
" {'ap': 'P04', 'eng': 'P04'},\n",
" {'ap': 'P05', 'eng': 'P05'},\n",
" {'ap': 'P06', 'eng': 'P06'},\n",
" {'ap': 'P07', 'eng': 'P07'},\n",
" {'ap': 'P08', 'eng': 'P08'},\n",
" {'ap': 'P09', 'eng': 'P09'},\n",
" {'ap': 'P10', 'eng': 'P10'},\n",
" {'ap': 'P11', 'eng': 'P11'},\n",
" {'ap': 'P12', 'eng': 'P12'},\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# from the translator\n",
"def build_type_A(ap1):\n",
" raw_ltl = f\"F ( {ap1['ap']} )\"\n",
" raw_ltl = f\"F ( {ap1['ap']} )\"\n",
" canonical_ltl = f\"finally ( {ap1['eng']} )\"\n",
" translated = f\"go to {ap1['eng']}\"\n",
" return {\n",
" 'raw_ltl': raw_ltl,\n",
" 'canonical_ltl': canonical_ltl,\n",
" 'eng': translated}\n",
"\n",
"def build_type_B(room_1, room_2):\n",
" raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n",
" canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n",
" translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n",
" return {\n",
" 'raw_ltl': raw_ltl,\n",
" 'canonical_ltl': canonical_ltl,\n",
" 'eng': translated}\n",
"\n",
"def build_type_C(room_1, room_2):\n",
" raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n",
" canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n",
" translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n",
" return {\n",
" 'raw_ltl': raw_ltl,\n",
" 'canonical_ltl': canonical_ltl,\n",
" 'eng': translated}\n",
"\n",
"def build_type_D(room_1, room_2):\n",
" raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n",
" canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n",
" translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n",
" return {\n",
" 'raw_ltl': raw_ltl,\n",
" 'canonical_ltl': canonical_ltl,\n",
" 'eng': translated}\n",
"\n",
"def build_type_E(room_1, room_2):\n",
" raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n",
" canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n",
" translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n",
" return {\n",
" 'raw_ltl': raw_ltl,\n",
" 'canonical_ltl': canonical_ltl,\n",
" 'eng': translated}"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"translation_seeds = []\n",
"for r1 in APs:\n",
" translation_seeds.append(build_type_A(r1))\n",
" for r2 in APs:\n",
" if r1 == r2:\n",
" continue\n",
" translation_seeds.append(build_type_B(r1, r2))\n",
" translation_seeds.append(build_type_C(r1, r2))\n",
" translation_seeds.append(build_type_D(r1, r2))\n",
" translation_seeds.append(build_type_E(r1, r2))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"540\n"
]
}
],
"source": [
"print(len(translation_seeds))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'G (!(P01 && P02) -> F P03)', ' ! G (! (P02 && X P02))', 'F (P03 && P02)', 'G(P01 -> F P02) -> G F P03', 'F P01 && F P02', 'G (P02 -> X F P01)', 'P03 U (G (F P04))', 'G F P01 || G F P02', '(! P02) U P01', 'G(P01 -> X (X (X P02)))', 'F G ! P04', 'P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)', 'G (P01 -> X G ! P02)', 'G (P03 -> F P06)', 'G(!(P02 &&P04)) && G(P02 || P04)', 'G ( P02 -> X P04)', 'G (P03 -> (P04 | X P04))', 'G ( P01 -> (X !P01 | XX !P01 | XXX !P01))', 'P03 -> X ((P02 U P01) || G P02)', 'G(P03 -> P02)', '(F P05) -> (!P05 U (P06 & !P05))', 'G ((P04 <-> P02) -> (P03 <-> P05))', 'G F P01 || X P02', ' G (P03 | P04 | P05)', 'G(!(P01 & P02))', ' G (P01 <-> X P02)', 'G (P01 && X P02 -> X X P04)', 'G( P01 -> F P02 )', 'G (P04)', 'G !(P01 & P03)', 'G P02 && G(P03 -> !P01)', ' ! G (! (P01 && X P01))', 'G (P02 && P01)', ' (P02 U (P02 & ! P01)) | G P02', 'G F P03 -> G F P02', '(P03 U P02) || G P03'}\n"
]
}
],
"source": [
"seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
"print(unique_ltls - seed_ltls)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the canonical decoding list"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"possible_decodings = {}\n",
"for seed in translation_seeds:\n",
" canonical = seed['canonical_ltl']\n",
" possible_decodings[canonical] = {\n",
" 'formula': canonical,\n",
" 'raw': seed['raw_ltl'],\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"with open(\"canonical.json\", 'w') as f:\n",
" f.write(json.dumps(possible_decodings, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save translation seed for zero shot"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"with open(\"train_seed.jsonl\", \"w\") as f:\n",
" for dp in translation_seeds:\n",
" better_ltl = dp['canonical_ltl']\n",
" entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
" json.dump(entry, f)\n",
" f.write('\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save golden data for evaluation "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"raw_canonical_mapping = {\n",
" seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"756"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(DPs)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ltl': 'G( P01 -> F P02 )',\n",
" 'eng': 'Every P01 is eventually followed by a P02.'}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DPs[0]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'G( P01 -> F P02 )'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mgolden.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m dp \u001b[39min\u001b[39;00m DPs:\n\u001b[1;32m 3\u001b[0m entry \u001b[39m=\u001b[39m {\n\u001b[0;32m----> 4\u001b[0m \u001b[39m'\u001b[39m\u001b[39mcanonical\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39;49m\u001b[39mltl\u001b[39;49m\u001b[39m'\u001b[39;49m]],\n\u001b[1;32m 5\u001b[0m \u001b[39m'\u001b[39m\u001b[39mformula\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m]],\n\u001b[1;32m 6\u001b[0m \u001b[39m'\u001b[39m\u001b[39mnatural\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39meng\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 7\u001b[0m \u001b[39m'\u001b[39m\u001b[39mraw_ltl\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 8\u001b[0m }\n\u001b[1;32m 9\u001b[0m json\u001b[39m.\u001b[39mdump(entry, f)\n\u001b[1;32m 10\u001b[0m f\u001b[39m.\u001b[39mwrite(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)\n",
"\u001b[0;31mKeyError\u001b[0m: 'G( P01 -> F P02 )'"
]
}
],
"source": [
"with open(\"golden.jsonl\", \"w\") as f:\n",
" for dp in DPs:\n",
" entry = {\n",
" 'canonical': raw_canonical_mapping[dp['ltl']],\n",
" 'formula': raw_canonical_mapping[dp['ltl']],\n",
" 'natural': dp['eng'],\n",
" 'raw_ltl': dp['ltl'],\n",
" }\n",
" json.dump(entry, f)\n",
" f.write('\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.12 ('GPML')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.17"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}