{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"raw/ltl.txt\", \"r\") as f:\n",
    "    raw_ltls = f.read().splitlines()\n",
    "    unique_ltls = set(raw_ltls)\n",
    "with open(\"raw/eng.txt\", 'r') as f:\n",
    "    raw_engs = f.read().splitlines()\n",
    "\n",
    "DPs = []\n",
    "for ltl, eng in zip(raw_ltls, raw_engs):\n",
    "    DPs.append({'ltl': ltl, 'eng': eng})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "G (P04)\n",
      "F G ! P04\n",
      "(! P02) U P01\n",
      "G(P03 -> P02)\n",
      "F (P03 && P02)\n",
      "F P01 && F P02\n",
      "G !(P01 & P03)\n",
      "G (P02 && P01)\n",
      "G(!(P01 & P02))\n",
      "G (P03 -> F P06)\n",
      "G F P01 || X P02\n",
      "P03 U (G (F P04))\n",
      "G ( P02 -> X P04)\n",
      "G( P01 -> F P02 )\n",
      "G (P02 -> X F P01)\n",
      "G F P01 || G F P02\n",
      " G (P01 <-> X P02)\n",
      "G F P03 -> G F P02\n",
      "G (P01 -> X G ! P02)\n",
      " G (P03 | P04 | P05)\n",
      "(P03 U P02) || G P03\n",
      " ! G (! (P02 && X P02))\n",
      "G(P01 -> X (X (X P02)))\n",
      "G P02 && G(P03 -> !P01)\n",
      " ! G (! (P01 && X P01))\n",
      "G (P03 -> (P04 | X P04))\n",
      "G (!(P01 && P02) -> F P03)\n",
      "G(P01 -> F P02) -> G F P03\n",
      "G (P01 && X P02 -> X X P04)\n",
      " (P02 U (P02 & ! P01)) | G P02\n",
      "P03 -> X ((P02 U P01) || G P02)\n",
      "G(!(P02 &&P04)) && G(P02 || P04)\n",
      "(F P05) -> (!P05 U (P06 & !P05))\n",
      "G ((P04 <-> P02) -> (P03 <-> P05))\n",
      "G ( P01 -> (X !P01 | XX !P01 | XXX !P01))\n",
      "P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)\n"
     ]
    }
   ],
   "source": [
    "for ltl in sorted(unique_ltls, key=lambda x: len(x)):\n",
    "    print(ltl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Drone-planning Domain, with augmentation\n",
      "Number of Data Points 756\n",
      "Number of unique LTL expressions: 36\n",
      "Number of unique LTL structures: 5\n"
     ]
    }
   ],
   "source": [
    "print(\"Drone-planning Domain, with augmentation\")\n",
    "print(\"Number of Data Points\", len(DPs))\n",
    "print(\"Number of unique LTL expressions:\", len(unique_ltls))\n",
    "print(\"Number of unique LTL structures:\", 5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "F ( AP )\n",
    "- Go to AP\n",
    "\n",
    "F ( AP1 & F ( AP2 ) )\n",
    "- First go to AP1 and then to AP2\n",
    "\n",
    "\n",
    "F ( AP1 ) & G ( AP2 )\n",
    "- Go to AP1 and Always in AP2\n",
    "\n",
    "\n",
    "F ( AP1 ) & ~ ( AP2 )\n",
    "- Go to AP1 and avoid AP2 \n",
    "\n",
    "~ ( AP1 ) U ( AP2 )\n",
    "- avoid going through AP1 until go through AP2\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Clean up LTL expressions "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "ori_aps = set()\n",
    "for unique_ltl in unique_ltls:\n",
    "    ori_aps.update(unique_ltl.split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'!',\n",
       " '!(P01',\n",
       " '!P01',\n",
       " '!P01)',\n",
       " '!P01))',\n",
       " '!P05))',\n",
       " '&',\n",
       " '&&',\n",
       " '&&P04))',\n",
       " '(',\n",
       " '(!',\n",
       " '(!(P01',\n",
       " '(!P05',\n",
       " '((P02',\n",
       " '((P04',\n",
       " '(F',\n",
       " '(G',\n",
       " '(P01',\n",
       " '(P02',\n",
       " '(P03',\n",
       " '(P04',\n",
       " '(P04)',\n",
       " '(P06',\n",
       " '(X',\n",
       " ')',\n",
       " '->',\n",
       " '<->',\n",
       " 'F',\n",
       " 'G',\n",
       " 'G(',\n",
       " 'G(!(P01',\n",
       " 'G(!(P02',\n",
       " 'G(P01',\n",
       " 'G(P02',\n",
       " 'G(P03',\n",
       " 'P01',\n",
       " 'P01)',\n",
       " 'P01))',\n",
       " 'P02',\n",
       " 'P02)',\n",
       " 'P02))',\n",
       " 'P02)))',\n",
       " 'P03',\n",
       " 'P03)',\n",
       " 'P04',\n",
       " 'P04)',\n",
       " 'P04))',\n",
       " 'P05)',\n",
       " 'P05))',\n",
       " 'P06)',\n",
       " 'U',\n",
       " 'X',\n",
       " 'XX',\n",
       " 'XXX',\n",
       " '|',\n",
       " '||'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ori_aps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "APs = [\n",
    "    {'ap': 'blue_room', 'eng': 'the blue room'}, \n",
    "    {'ap': 'green_room', 'eng': 'the green room'},\n",
    "    {'ap': 'orange_room', 'eng': 'the orange room'},\n",
    "    {'ap': 'purple_room', 'eng': 'the purple room'},\n",
    "    {'ap': 'red_room', 'eng': 'the red room'},\n",
    "    {'ap': 'yellow_room', 'eng': 'the yellow room'},\n",
    "    {'ap': 'first_floor', 'eng': 'the first floor'},\n",
    "    {'ap': 'second_floor', 'eng': 'the second floor'},\n",
    "    {'ap': 'third_floor', 'eng': 'the third floor'},\n",
    "    {'ap': 'landmark_1', 'eng': 'landmark 1'},\n",
    "    {'ap': 'landmark_2', 'eng': 'landmark 2'},\n",
    "    {'ap': 'landmark_3', 'eng': 'landmark 3'},\n",
    "]\n",
    "APs = [\n",
    "    {'ap': 'P01', 'eng': 'P01'},\n",
    "    {'ap': 'P02', 'eng': 'P02'},\n",
    "    {'ap': 'P03', 'eng': 'P03'},\n",
    "    {'ap': 'P04', 'eng': 'P04'},\n",
    "    {'ap': 'P05', 'eng': 'P05'},\n",
    "    {'ap': 'P06', 'eng': 'P06'},\n",
    "    {'ap': 'P07', 'eng': 'P07'},\n",
    "    {'ap': 'P08', 'eng': 'P08'},\n",
    "    {'ap': 'P09', 'eng': 'P09'},\n",
    "    {'ap': 'P10', 'eng': 'P10'},\n",
    "    {'ap': 'P11', 'eng': 'P11'},\n",
    "    {'ap': 'P12', 'eng': 'P12'},\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from the translator\n",
    "def build_type_A(ap1):\n",
    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
    "    raw_ltl = f\"F ( {ap1['ap']} )\"\n",
    "    canonical_ltl = f\"finally ( {ap1['eng']} )\"\n",
    "    translated = f\"go to {ap1['eng']}\"\n",
    "    return {\n",
    "        'raw_ltl': raw_ltl,\n",
    "        'canonical_ltl': canonical_ltl,\n",
    "        'eng': translated}\n",
    "\n",
    "def build_type_B(room_1, room_2):\n",
    "    raw_ltl = f\"F ( {room_1['ap']} & F ( {room_2['ap']} ) )\"\n",
    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , finally ( {room_2['eng']} ) ) )\"\n",
    "    translated = f\"go to {room_1['eng']}, and then {room_2['eng']} at last\"\n",
    "    return {\n",
    "        'raw_ltl': raw_ltl,\n",
    "        'canonical_ltl': canonical_ltl,\n",
    "        'eng': translated}\n",
    "\n",
    "def build_type_C(room_1, room_2):\n",
    "    raw_ltl = f\"F ( {room_1['ap']} ) & G ( {room_2['ap']} )\"\n",
    "    canonical_ltl = f\"and ( finally ( {room_1['eng']} ) , always ( {room_2['eng']} ) )\"\n",
    "    translated = f\"go to {room_1['eng']} at last, and always stay in {room_2['eng']}\"\n",
    "    return {\n",
    "        'raw_ltl': raw_ltl,\n",
    "        'canonical_ltl': canonical_ltl,\n",
    "        'eng': translated}\n",
    "\n",
    "def build_type_D(room_1, room_2):\n",
    "    raw_ltl = f\"F ( {room_1['ap']} & ~ {room_2['ap']} )\"\n",
    "    canonical_ltl = f\"finally ( and ( {room_1['eng']} , not ( {room_2['eng']} ) ) )\"\n",
    "    translated = f\"go to {room_1['eng']} at last, and avoid {room_2['eng']}\"\n",
    "    return {\n",
    "        'raw_ltl': raw_ltl,\n",
    "        'canonical_ltl': canonical_ltl,\n",
    "        'eng': translated}\n",
    "\n",
    "def build_type_E(room_1, room_2):\n",
    "    raw_ltl = f\"~ ( {room_1['ap']} ) U ( {room_2['ap']} )\"\n",
    "    canonical_ltl = f\"until ( not ( {room_1['eng']} ) , {room_2['eng']} )\"\n",
    "    translated = f\"avoid go to {room_1['eng']} until go to {room_2['eng']}\"\n",
    "    return {\n",
    "        'raw_ltl': raw_ltl,\n",
    "        'canonical_ltl': canonical_ltl,\n",
    "        'eng': translated}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "translation_seeds = []\n",
    "for r1 in APs:\n",
    "    translation_seeds.append(build_type_A(r1))\n",
    "    for r2 in APs:\n",
    "        if r1 == r2:\n",
    "            continue\n",
    "        translation_seeds.append(build_type_B(r1, r2))\n",
    "        translation_seeds.append(build_type_C(r1, r2))\n",
    "        translation_seeds.append(build_type_D(r1, r2))\n",
    "        translation_seeds.append(build_type_E(r1, r2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "540\n"
     ]
    }
   ],
   "source": [
    "print(len(translation_seeds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'G (!(P01 && P02) -> F P03)', ' ! G (! (P02 && X P02))', 'F (P03 && P02)', 'G(P01 -> F P02) -> G F P03', 'F P01 && F P02', 'G (P02 -> X F P01)', 'P03 U (G (F P04))', 'G F P01 || G F P02', '(! P02) U P01', 'G(P01 -> X (X (X P02)))', 'F G ! P04', 'P02 && G (P02 -> X ! P02 && X X ! P02 && X X X ! P02 && X X X X ! P02 && X X X X X P02)', 'G (P01 -> X G ! P02)', 'G (P03 -> F P06)', 'G(!(P02 &&P04)) && G(P02 || P04)', 'G ( P02 -> X P04)', 'G (P03 -> (P04 | X P04))', 'G ( P01 -> (X !P01 | XX !P01 | XXX !P01))', 'P03 -> X ((P02 U P01) || G P02)', 'G(P03 -> P02)', '(F P05) -> (!P05 U (P06 & !P05))', 'G ((P04 <-> P02) -> (P03 <-> P05))', 'G F P01 || X P02', ' G (P03 | P04 | P05)', 'G(!(P01 & P02))', ' G (P01 <-> X P02)', 'G (P01 && X P02 -> X X P04)', 'G( P01 -> F P02 )', 'G (P04)', 'G !(P01 & P03)', 'G P02 && G(P03 -> !P01)', ' ! G (! (P01 && X P01))', 'G (P02 && P01)', ' (P02 U (P02 & ! P01)) | G P02', 'G F P03 -> G F P02', '(P03 U P02) || G P03'}\n"
     ]
    }
   ],
   "source": [
    "seed_ltls = set([t['raw_ltl'] for t in translation_seeds])\n",
    "print(unique_ltls - seed_ltls)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save the canonical decoding list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "possible_decodings = {}\n",
    "for seed in translation_seeds:\n",
    "    canonical = seed['canonical_ltl']\n",
    "    possible_decodings[canonical] = {\n",
    "        'formula': canonical,\n",
    "        'raw': seed['raw_ltl'],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"canonical.json\", 'w') as f:\n",
    "    f.write(json.dumps(possible_decodings, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save translation seed for zero shot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"train_seed.jsonl\", \"w\") as f:\n",
    "    for dp in translation_seeds:\n",
    "        better_ltl = dp['canonical_ltl']\n",
    "        entry = {'canonical': better_ltl, 'formula': better_ltl, 'natural': dp['eng']}\n",
    "        json.dump(entry, f)\n",
    "        f.write('\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save golden data for evaluation  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_canonical_mapping = {\n",
    "    seed['raw_ltl']: seed['canonical_ltl'] for seed in translation_seeds\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "756"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(DPs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ltl': 'G( P01 -> F P02 )',\n",
       " 'eng': 'Every P01 is eventually followed by a P02.'}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DPs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'G( P01 -> F P02 )'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb Cell 24\u001b[0m line \u001b[0;36m4\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mgolden.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m     \u001b[39mfor\u001b[39;00m dp \u001b[39min\u001b[39;00m DPs:\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m         entry \u001b[39m=\u001b[39m {\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mcanonical\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39;49m\u001b[39mltl\u001b[39;49m\u001b[39m'\u001b[39;49m]],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mformula\u001b[39m\u001b[39m'\u001b[39m: raw_canonical_mapping[dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m]],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mnatural\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39meng\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a>\u001b[0m             \u001b[39m'\u001b[39m\u001b[39mraw_ltl\u001b[39m\u001b[39m'\u001b[39m: dp[\u001b[39m'\u001b[39m\u001b[39mltl\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=7'>8</a>\u001b[0m         }\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a>\u001b[0m         json\u001b[39m.\u001b[39mdump(entry, f)\n\u001b[1;32m     <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2249434c2d736572766572227d/home/shaojun/code/NL2TL_translaters/NL2TL-dataset/datasets-nl2spec/preprocess.ipynb#X32sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m         f\u001b[39m.\u001b[39mwrite(\u001b[39m'\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m'\u001b[39m)\n",
      "\u001b[0;31mKeyError\u001b[0m: 'G( P01 -> F P02 )'"
     ]
    }
   ],
   "source": [
    "with open(\"golden.jsonl\", \"w\") as f:\n",
    "    for dp in DPs:\n",
    "        entry = {\n",
    "            'canonical': raw_canonical_mapping[dp['ltl']],\n",
    "            'formula': raw_canonical_mapping[dp['ltl']],\n",
    "            'natural': dp['eng'],\n",
    "            'raw_ltl': dp['ltl'],\n",
    "        }\n",
    "        json.dump(entry, f)\n",
    "        f.write('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.12 ('GPML')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "92a84f77637d1d47b588cbbaac9b07f8c628b67f58e672e955ed4902878afbbe"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}