{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Load" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open(\"raw/ltl.txt\", \"r\") as f:\n", " raw_ltls = f.read().splitlines()\n", " unique_ltls = set(raw_ltls)\n", "with open(\"raw/eng.txt\", 'r') as f:\n", " raw_engs = f.read().splitlines()\n", "\n", "DPs = []\n", "for ltl, eng in zip(raw_ltls, raw_engs):\n", " DPs.append({'ltl': ltl, 'eng': eng})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "G (P04)\n", "F G ! P04\n", "(! P02) U P01\n", "G(P03 -> P02)\n", "F (P03 && P02)\n", "F P01 && F P02\n", "G !(P01 & P03)\n", "G (P02 && P01)\n", "G(!(P01 & P02))\n", "G (P03 -> F P06)\n", "G F P01 || X P02\n", "P03 U (G (F P04))\n", "G ( P02 -> X P04)\n", "G( P01 -> F P02 )\n", "G (P02 -> X F P01)\n", "G F P01 || G F P02\n", " G (P01 <-> X P02)\n", "G F P03 -> G F P02\n", "G (P01 -> X G ! P02)\n", " G (P03 | P04 | P05)\n", "(P03 U P02) || G P03\n", " ! G (! (P02 && X P02))\n", "G(P01 -> X (X (X P02)))\n", "G P02 && G(P03 -> !P01)\n", " ! G (! (P01 && X P01))\n", "G (P03 -> (P04 | X P04))\n", "G (!(P01 && P02) -> F P03)\n", "G(P01 -> F P02) -> G F P03\n", "G (P01 && X P02 -> X X P04)\n", " (P02 U (P02 & ! P01)) | G P02\n", "P03 -> X ((P02 U P01) || G P02)\n", "G(!(P02 &&P04)) && G(P02 || P04)\n", "(F P05) -> (!P05 U (P06 & !P05))\n", "G ((P04 <-> P02) -> (P03 <-> P05))\n", "G ( P01 -> (X !P01 | XX !P01 | XXX !P01))\n", "P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)\n" ] } ], "source": [ "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n", " print(ltl)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Drone-planning Domain, with augmentation\n", "Number of Data Points 756\n", "Number of unique LTL expressions: 36\n", "Number of unique LTL structures: 5\n" ] } ], "source": [ "print(\"Drone-planning Domain, with augmentation\")\n", "print(\"Number of Data Points\", len(DPs))\n", "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n", "print(\"Number of unique LTL structures:\", 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "F ( AP )\n", "- Go to AP\n", "\n", "F ( AP1 & F ( AP2 ) )\n", "- First go to AP1 and then to AP2\n", "\n", "\n", "F ( AP1 ) & G ( AP2 )\n", "- Go to AP1 and Always in AP2\n", "\n", "\n", "F ( AP1 ) & ~ ( AP2 )\n", "- Go to AP1 and avoid AP2 \n", "\n", "~ ( AP1 ) U ( AP2 )\n", "- avoid going through AP1 until go through AP2\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Clean up LTL expressions " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "ori_aps = set()\n", "for unique_ltl in unique_ltls:\n", " ori_aps.update(unique_ltl.split())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'!',\n", " '!(P01',\n", " '!P01',\n", " '!P01)',\n", " '!P01))',\n", " '!P05))',\n", " '&',\n", " '&&',\n", " '&&P04))',\n", " '(',\n", " '(!',\n", " '(!(P01',\n", " '(!P05',\n", " '((P02',\n", " '((P04',\n", " '(F',\n", " '(G',\n", " '(P01',\n", " '(P02',\n", " '(P03',\n", " '(P04',\n", " '(P04)',\n", " '(P06',\n", " '(X',\n", " ')',\n", " '->',\n", " '<->',\n", " 'F',\n", " 'G',\n", " 'G(',\n", " 'G(!(P01',\n", " 'G(!(P02',\n", " 'G(P01',\n", " 'G(P02',\n", " 'G(P03',\n", " 'P01',\n", " 'P01)',\n", " 'P01))',\n", " 'P02',\n", " 'P02)',\n", " 'P02))',\n", " 'P02)))',\n", " 'P03',\n", " 'P03)',\n", " 'P04',\n", " 'P04)',\n", " 'P04))',\n", " 'P05)',\n", " 'P05))',\n", " 'P06)',\n", " 'U',\n", " 'X',\n", " 'XX',\n", " 'XXX',\n", " '|',\n", " '||'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ori_aps" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "APs = [\n", " {'ap': 'blue_room', 'eng': 'the blue room'}, \n", " {'ap': 'green_room', 'eng': 'the green room'},\n", " {'ap': 'orange_room', 'eng': 'the orange room'},\n", " {'ap': 'purple_room', 'eng': 'the purple room'},\n", " {'ap': 'red_room', 'eng': 'the red room'},\n", " {'ap': 'yellow_room', 'eng': 'the yellow room'},\n", " {'ap': 'first_floor', 'eng': 'the first floor'},\n", " {'ap': 'second_floor', 'eng': 'the second floor'},\n", " {'ap': 'third_floor', 'eng': 'the third floor'},\n", " {'ap': 'landmark_1', 'eng': 'landmark 1'},\n", " {'ap': 'landmark_2', 'eng': 'landmark 2'},\n", " {'ap': 'landmark_3', 'eng': 'landmark 3'},\n", "]\n", "APs = [\n", " {'ap': 'P01', 'eng': 'P01'},\n", " {'ap': 'P02', 'eng': 'P02'},\n", " {'ap': 'P03', 'eng': 'P03'},\n", " {'ap': 'P04', 'eng': 'P04'},\n", " {'ap': 'P05', 'eng': 'P05'},\n", " {'ap': 'P06', 'eng': 'P06'},\n", " {'ap': 'P07', 'eng': 'P07'},\n", " {'ap': 'P08', 'eng': 'P08'},\n", " {'ap': 'P09', 'eng': 'P09'},\n", " {'ap': 'P10', 'eng': 'P10'},\n", " {'ap': 'P11', 'eng': 'P11'},\n", " {'ap': 'P12', 'eng': 'P12'},\n", "]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# from the translator\n", "def build_type_A(ap1):\n", " raw_ltl = f\"F ( {ap1['ap']} )\"\n", " raw_ltl = f\"F ( {ap1['ap']} )\"\n", " canonical_ltl = f\"finally ( {ap1['eng']} )\"\n", " translated = f\"go to {ap1['eng']}\"\n", " return {\n", " 'raw_ltl': raw_ltl,\n", " 'canonical_ltl': canonical_ltl,\n", " 'eng': translated}\n", "\n", "def build_type_B(room_1, room_2):\n", " raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n", " canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n", " translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n", " return {\n", " 'raw_ltl': raw_ltl,\n", " 'canonical_ltl': canonical_ltl,\n", " 'eng': translated}\n", "\n", "def build_type_C(room_1, room_2):\n", " raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n", " canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n", " translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n", " return {\n", " 'raw_ltl': raw_ltl,\n", " 'canonical_ltl': canonical_ltl,\n", " 'eng': translated}\n", "\n", "def build_type_D(room_1, room_2):\n", " raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n", " canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n", " translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n", " return {\n", " 'raw_ltl': raw_ltl,\n", " 'canonical_ltl': canonical_ltl,\n", " 'eng': translated}\n", "\n", "def build_type_E(room_1, room_2):\n", " raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n", " canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n", " translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n", " return {\n", " 'raw_ltl': raw_ltl,\n", " 'canonical_ltl': canonical_ltl,\n", " 'eng': translated}" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "translation_seeds = []\n", "for r1 in APs:\n", " translation_seeds.append(build_type_A(r1))\n", " for r2 in APs:\n", " if r1 == r2:\n", " continue\n", " translation_seeds.append(build_type_B(r1, r2))\n", " translation_seeds.append(build_type_C(r1, r2))\n", " translation_seeds.append(build_type_D(r1, r2))\n", " translation_seeds.append(build_type_E(r1, r2))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "540\n" ] } ], "source": [ "print(len(translation_seeds))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'G (!(P01 && P02) -> F P03)', ' ! G (! (P02 && X P02))', 'F (P03 && P02)', 'G(P01 -> F P02) -> G F P03', 'F P01 && F P02', 'G (P02 -> X F P01)', 'P03 U (G (F P04))', 'G F P01 || G F P02', '(! P02) U P01', 'G(P01 -> X (X (X P02)))', 'F G ! P04', 'P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)', 'G (P01 -> X G ! P02)', 'G (P03 -> F P06)', 'G(!(P02 &&P04)) && G(P02 || P04)', 'G ( P02 -> X P04)', 'G (P03 -> (P04 | X P04))', 'G ( P01 -> (X !P01 | XX !P01 | XXX !P01))', 'P03 -> X ((P02 U P01) || G P02)', 'G(P03 -> P02)', '(F P05) -> (!P05 U (P06 & !P05))', 'G ((P04 <-> P02) -> (P03 <-> P05))', 'G F P01 || X P02', ' G (P03 | P04 | P05)', 'G(!(P01 & P02))', ' G (P01 <-> X P02)', 'G (P01 && X P02 -> X X P04)', 'G( P01 -> F P02 )', 'G (P04)', 'G !(P01 & P03)', 'G P02 && G(P03 -> !P01)', ' ! G (! (P01 && X P01))', 'G (P02 && P01)', ' (P02 U (P02 & ! P01)) | G P02', 'G F P03 -> G F P02', '(P03 U P02) || G P03'}\n" ] } ], "source": [ "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n", "print(unique_ltls - seed_ltls)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save the canonical decoding list" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "possible_decodings = {}\n", "for seed in translation_seeds:\n", " canonical = seed['canonical_ltl']\n", " possible_decodings[canonical] = {\n", " 'formula': canonical,\n", " 'raw': seed['raw_ltl'],\n", " }" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "with open(\"canonical.json\", 'w') as f:\n", " f.write(json.dumps(possible_decodings, indent=2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save translation seed for zero shot" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "with open(\"train_seed.jsonl\", \"w\") as f:\n", " for dp in translation_seeds:\n", " better_ltl = dp['canonical_ltl']\n", " entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n", " json.dump(entry, f)\n", " f.write('\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save golden data for evaluation " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "raw_canonical_mapping = {\n", " seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n", "}" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "756" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(DPs)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ltl': 'G( P01 -> F P02 )',\n", " 'eng': 'Every P01 is eventually followed by a P02.'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DPs[0]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'G( P01 -> F P02 )'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mgolden.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m dp \u001b[39min\u001b[39;00m DPs:\n\u001b[1;32m 3\u001b[0m entry \u001b[39m=\u001b[39m {\n\u001b[0;32m----> 4\u001b[0m \u001b[39m'\u001b[39m\u001b[39mcanonical\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39;49m\u001b[39mltl\u001b[39;49m\u001b[39m'\u001b[39;49m]],\n\u001b[1;32m 5\u001b[0m \u001b[39m'\u001b[39m\u001b[39mformula\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m]],\n\u001b[1;32m 6\u001b[0m \u001b[39m'\u001b[39m\u001b[39mnatural\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39meng\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 7\u001b[0m \u001b[39m'\u001b[39m\u001b[39mraw_ltl\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 8\u001b[0m }\n\u001b[1;32m 9\u001b[0m json\u001b[39m.\u001b[39mdump(entry, f)\n\u001b[1;32m 10\u001b[0m f\u001b[39m.\u001b[39mwrite(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)\n", "\u001b[0;31mKeyError\u001b[0m: 'G( P01 -> F P02 )'" ] } ], "source": [ "with open(\"golden.jsonl\", \"w\") as f:\n", " for dp in DPs:\n", " entry = {\n", " 'canonical': raw_canonical_mapping[dp['ltl']],\n", " 'formula': raw_canonical_mapping[dp['ltl']],\n", " 'natural': dp['eng'],\n", " 'raw_ltl': dp['ltl'],\n", " }\n", " json.dump(entry, f)\n", " f.write('\\n')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('GPML')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.17" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe" } } }, "nbformat": 4, "nbformat_minor": 2 }