`_.
+
+.. note::
+
+ This project is under active development.
+
+Contents
+--------
+
+.. toctree::
+ :maxdepth: 2
+
+ usage
+ api
+ tutorials
diff --git a/BrowserGym/docs/src/tutorials.rst b/BrowserGym/docs/src/tutorials.rst
new file mode 100644
index 0000000000000000000000000000000000000000..01f5bcdb67769cd04b4d1e7c01e11fa9d0741bf6
--- /dev/null
+++ b/BrowserGym/docs/src/tutorials.rst
@@ -0,0 +1,26 @@
+Tutorials
+=========
+
+This section provides tutorials to help build new environments and tasks.
+
+.. grid:: 2
+ :gutter: 2
+
+ .. grid-item-card:: Walkthrough
+ :link: examples/walkthrough.html
+
+ :bdg-primary:`Getting started`
+
+ .. grid-item-card:: Create a custom task
+ :link: examples/create_custom_task.html
+
+ :bdg-primary:`Custom task`
+
+
+
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+
+ examples/walkthrough.rst
+ examples/create_custom_task.rst
diff --git a/BrowserGym/docs/src/usage.rst b/BrowserGym/docs/src/usage.rst
new file mode 100644
index 0000000000000000000000000000000000000000..038ca6d93508754bea95f4f7d8c7ddabbfc8e3f6
--- /dev/null
+++ b/BrowserGym/docs/src/usage.rst
@@ -0,0 +1,42 @@
+Usage
+=====
+
+.. _installation:
+
+Installation
+------------
+
+To use BrowserGym, first install it using pip:
+
+.. code-block:: console
+
+ pip install browsergym
+
+Then, a required step is to setup playwright by running
+
+.. code-block:: console
+
+ playwright install chromium
+
+Example code
+------------
+
+Boilerplate code to run an agent on an interactive, open-ended task:
+
+.. code-block:: python
+
+ import gymnasium as gym
+ import browsergym.core # register the openended task as a gym environment
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": "https://www.google.com/"}, # starting URL
+ wait_for_user_message=True, # wait for a user message after each agent message sent to the chat
+ )
+
+ obs, info = env.reset()
+ done = False
+ while not done:
+ action = ... # implement your agent here
+ obs, reward, terminated, truncated, info = env.step(action)
+ done = terminated or truncated
diff --git a/BrowserGym/pyproject.toml b/BrowserGym/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..68b016a511ba078a6a0614b47af3d6026d04f83c
--- /dev/null
+++ b/BrowserGym/pyproject.toml
@@ -0,0 +1,33 @@
+[project]
+name = "browsergym-meta"
+description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
+dynamic = ["version"]
+[tool.setuptools]
+packages = [] # meta distribution, packages are included as dependencies
+[tool.black]
+line-length = 100
+include = '\.pyi?$'
+exclude = '''
+/(
+ \.eggs
+ | \.git
+ | \.hg
+ | \.mypy_cache
+ | \.nox
+ | \.tox
+ | \.venv
+ | _build
+ | buck-out
+ | build
+ | dist
+)/
+'''
+
+[tool.pytest.ini_options]
+filterwarnings = [
+ 'ignore::UserWarning:gymnasium.*:', # too many "The obs is not within the observation space." warnings.
+]
+markers = [
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+ "serial: mark test to be run sequantially (deselect with '-m \"not serial\"')"
+]
diff --git a/BrowserGym/tests/__init__.py b/BrowserGym/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/BrowserGym/tests/assistantbench/__init__.py b/BrowserGym/tests/assistantbench/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl b/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..04c1bf8931e9aca6493316f7f4703ba2f4c95d92
--- /dev/null
+++ b/BrowserGym/tests/assistantbench/data/fallback_gpt4_seeplanact_predictions.jsonl
@@ -0,0 +1,33 @@
+{"id": "2aa5dd83fbcd0dce9a3dd4592106e5b5edf738008d932e357d477bba80e59ccf", "answer": "\\( \\frac{2}{7} \\times 100 \\approx 28.57 \\)", "gold_answer": "14.2", "score": 0, "has_ans": 1.0}
+{"id": "2ddae3b7a208e3c25f14d82d7a1faaaa1832fbf950b4dac345e755c4c361f294", "answer": 800000.0, "gold_answer": "1010000", "score": 0.7669061178326222, "has_ans": 1.0}
+{"id": "4e615af6f0348597b4133cc1ec5418bb3f35328e3d95e23a275027cee97b5e09", "answer": [], "gold_answer": "Adrenalinpark K\u00f6ln", "score": 0.0, "has_ans": 0}
+{"id": "c7afe00869f98cf363fd83677ac41757ed5e57f03eacc3d1304feb0a92084bd1", "answer": "Knives Out", "gold_answer": "Glass Onion: A Knives Out Mystery", "score": 0.5714285714285715, "has_ans": 1.0}
+{"id": "57d9dc6935e8a40b02e7f8ec81768fe70e68a0c05f6866927c9fda38db38a486", "answer": "-$108", "gold_answer": "45", "score": 0, "has_ans": 1.0}
+{"id": "748899d9d70c09beb3bd48ac8a3658bdcfd2f9114fe6dc4c4b8d2f9541ef4607", "answer": [{"sender": "dhl", "price (usd)": "50"}, {"sender": "fedex", "price (usd)": "60"}], "gold_answer": "{\"sender\": \"DHL\", \"price (usd)\": \"55-70\"}\n{\"sender\": \"Fedex\", \"price (usd)\": \"62-95\"}\n{\"sender\": \"USPS\", \"price (usd)\": \"73.4-78.15\"}", "score": 0.3333333333333333, "has_ans": 1.0}
+{"id": "9e31099fffa6a3891c94934fd4fc2f3f522d51c1904ff3561f3a10e4bf245821", "answer": "oshrat binyamin", "gold_answer": "Shiran Nawi, Yoni Osherov, Daniel Lereya", "score": 0.0, "has_ans": 1.0}
+{"id": "291b53e665b4dd4365cde995042db4a6f6fecef3fe3a6f4482f23d61bd673918", "answer": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/gcf_002288925.1_asm228892v2/gcf_002288925.1_asm228892v2_genomic.gff.gz", "gold_answer": "https://ftp.ensembl.org/pub/release-101/gff3/delphinapterus_leucas/Delphinapterus_leucas.ASM228892v3.101.gff3.gz", "score": 0.0, "has_ans": 1.0}
+{"id": "8fa42360185068216f2919935148d4e1ad28ddc18da0abd0f4bb0b6b6f84b127", "answer": "vgt", "gold_answer": "VGT", "score": 1.0, "has_ans": 1.0}
+{"id": "3af8028c2a59e28ca88baff0e6d91f2a9f170c5ef91003f1c8406755a2760ad4", "answer": "Oko, Thief of Crowns", "gold_answer": "Oko, Thief of Crowns", "score": 1.0, "has_ans": 1.0}
+{"id": "6b06d186921b8b390c65aebd0d16f09f60a47d2f1288ebe36953f734e84c0a3c", "answer": "", "gold_answer": "1148 sqft", "score": 0.0, "has_ans": 0.0}
+{"id": "9bdca8677af1e25cb7b0c7992dc62670c3e58e4afcd5ae60bcaa2483556bba00", "answer": ["'{\"sender\": \"usps\", \"price (usd)\": 25}'"], "gold_answer": "{\"sender\": \"USPS\", \"price (usd)\": \"41.75\"}", "score": 0, "has_ans": 1.0}
+{"id": "557e78eceec08ca8b0da5f9fdaca6e1c7ec6140a8ce600983ee716327dab005e", "answer": "Wolly Mammoth", "gold_answer": "For Pete's Sake", "score": 0.0, "has_ans": 1.0}
+{"id": "fb9ba3ab6a13d0adc677f993e90d54914a5cdf211305a1bba6bf16ec4ccb9b7c", "answer": "Instagram", "gold_answer": "Linkedin", "score": 0.0, "has_ans": 1.0}
+{"id": "52f7224e9c79431e7926afe317782711a0028750693e7456cde22ef6f4bd8bd5", "answer": "Nosferatu the Vampyre", "gold_answer": "Nosferatu the Vampyre", "score": 1.0, "has_ans": 1.0}
+{"id": "0ec4371851b96837b0a81b3dd3df401415061bb532fbafeb4609f3337c358508", "answer": ["anytime fitness", "point pleasant wellness center"], "gold_answer": "The Root Sports & Fitness Center\nMuscle Headz Gym", "score": 0.16666666666666666, "has_ans": 1.0}
+{"id": "6f224e7730ed027cbac73aebb1aea7f954053082041b02b19f4ff126a0a8a208", "answer": "Gina DiGioia", "gold_answer": "Gina DiGioia", "score": 1.0, "has_ans": 1.0}
+{"id": "99da66d8af02491f98b98c56b26c709e773b5a2ad945fb280375951ba600de09", "answer": 250.0, "gold_answer": "395", "score": 0.5425751529611245, "has_ans": 1.0}
+{"id": "ccec2229ced20a4b0cb4897e3a99120a3017ea030903e01c9bda6b13d40b0b14", "answer": "", "gold_answer": "McDonald's", "score": 0.0, "has_ans": 0.0}
+{"id": "9baaa267c95f9d8b75741ee9169c50563d297cfa592c20deaffd30dbc5984c74", "answer": 16.67, "gold_answer": "31.67", "score": 0.3582408362121543, "has_ans": 1.0}
+{"id": "6e3be83d1949fa52cba03fb1ce4b5b3bf7e37a83fd7d67694b10b2e439d90cf8", "answer": "wall street boxing & fitness", "gold_answer": "Renzo Gracie Jiu-Jitsu Wall Street", "score": 0.4, "has_ans": 1.0}
+{"id": "e2dc3a6b10b762e8aba7fa4d4e70f757f6d04dcbc8b56c48fc53fd9928d31d07", "answer": 40.0, "gold_answer": "30", "score": 0.7123179275482192, "has_ans": 1.0}
+{"id": "f88066d274e265edd6cd9d61cd80a41accb3a14baf2297652fdd05cdf716d455", "answer": "lower yosemite fall trail", "gold_answer": "Yosemite Falls\nBridalveil Fall", "score": 0.16666666666666666, "has_ans": 1.0}
+{"id": "e6bc98089608217e45b6956a46518fe3cce64a799b3ac43c6974c449ae14c408", "answer": 2140000.0, "gold_answer": "3080000", "score": 0.635876232048277, "has_ans": 1.0}
+{"id": "8ad84bd6fe38481ba49e7ad1f6fbd43219a999074e5c6fc940003281f55ec65b", "answer": ["trader joe's", "whole foods market", "aldi"], "gold_answer": "Potash Markets - Clark Street", "score": 0.0, "has_ans": 1.0}
+{"id": "55f4258484c5b398956133128a50462a767da211f8f72aa5ac5bbffb9bcbba1a", "answer": "Becker", "gold_answer": "CSI: Cyber", "score": 0.0, "has_ans": 1.0}
+{"id": "4dbedc5e1a0205e14b7ff3ba89bce3060dab15d0ada3b7e1351a6f2aa8287aec", "answer": 95.0, "gold_answer": "$55", "score": 0.4534562936319301, "has_ans": 1.0}
+{"id": "929b45f34805280d77c61d1e093e3d4e551d77ddb6ecd73552b12b1af286388d", "answer": "http://hgdownload.soe.ucsc.edu/goldenpath/canfam3/bigzips/", "gold_answer": "ftp://ftp.broadinstitute.org/distribution/assemblies/mammals/dog/canFam3.1/", "score": 0.0, "has_ans": 1.0}
+{"id": "cca4776df3c73e7f9430a2e624aafad056b14322a0b7ca6c0c22b7e7f3f0890a", "answer": "monica c. lozano", "gold_answer": "Wanda Austin\nRonald D. Sugar\nSue Wagner", "score": 0.0, "has_ans": 1.0}
+{"id": "efc0f3a47e9ed2ecdbcc037c2093865fe6e39f4d413a5d1ccdc7357160a4606b", "answer": "fidelity emerging asia fund (fseax)", "gold_answer": "Fidelity\u00ae Emerging Markets Index Fund (FPADX)", "score": 0.3636363636363636, "has_ans": 1.0}
+{"id": "b36ef2d8f2643b80e74a44ce3403f674ecb2aed7fd36afeaa289061a59feef92", "answer": "crunch fitness - east village", "gold_answer": "CrossFit East River\nAvea Pilates", "score": 0.14285714285714288, "has_ans": 1.0}
+{"id": "a9074997e698f912b9e751779ea19c1e92fa148404e90e0ae997acea3f9559b0", "answer": ["uncle tom's trail", "mount washburn", "fairy falls"], "gold_answer": "Trout lake trail\nArtist Point\nFountain Paint Pot\nLone Star Geyser\nStorm Point Trail", "score": 0.06666666666666667, "has_ans": 1.0}
+{"id": "797f7a5b65ca28b7e7156e7db1e9f117bd4a021de0cd512bfdbb0be897d89eab", "answer": ["red bamboo", "quantum leap"], "gold_answer": "Shanghai villa", "score": 0.0, "has_ans": 1.0}
\ No newline at end of file
diff --git a/BrowserGym/tests/assistantbench/test_env_general.py b/BrowserGym/tests/assistantbench/test_env_general.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a2c2e44f3849ad2b49e36893f6c50a705a34595
--- /dev/null
+++ b/BrowserGym/tests/assistantbench/test_env_general.py
@@ -0,0 +1,49 @@
+import logging
+import os
+import random
+
+import gymnasium as gym
+import playwright.sync_api
+import pytest
+from tenacity import retry, retry_if_exception_type, stop_after_attempt
+
+# register gym environments
+import browsergym.assistantbench
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+from browsergym.assistantbench import TEST_AB_TASK_IDS, VALID_AB_TASK_IDS
+
+rng = random.Random(1)
+valid_task_ids = rng.sample(VALID_AB_TASK_IDS, 10)
+test_task_ids = rng.sample(TEST_AB_TASK_IDS, 10)
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+@pytest.mark.parametrize("task_id", valid_task_ids + test_task_ids)
+@pytest.mark.slow
+def test_valid_env(task_id):
+ env = gym.make(
+ f"browsergym/{task_id}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ obs, reward, terminated, truncated, info = env.step("noop(0)")
+ assert not obs["last_action_error"]
+ assert not (terminated or truncated)
+
+ obs, reward, terminated, truncated, info = env.step('send_msg_to_user("something")')
+ assert not obs["last_action_error"]
+ assert terminated
+
+ env.close()
diff --git a/BrowserGym/tests/assistantbench/test_evaluation.py b/BrowserGym/tests/assistantbench/test_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4973d7158f780b0397ff669f7051e44fcfd8d0a5
--- /dev/null
+++ b/BrowserGym/tests/assistantbench/test_evaluation.py
@@ -0,0 +1,77 @@
+import json
+import pathlib
+
+import gymnasium as gym
+import pytest
+
+from browsergym.assistantbench.evaluation.evaluator import question_scorer
+from browsergym.experiments.benchmark.metadata.utils import (
+ task_list_from_metadata,
+ task_metadata,
+)
+
+__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
+
+metadata = task_metadata("assistantbench")
+file_path = pathlib.Path(__DATA_DIR) / "fallback_gpt4_seeplanact_predictions.jsonl"
+
+data_points = {}
+
+# Open the JSONL file and read each line as a JSON object
+with open(file_path, "r") as f:
+ for line in f:
+ data_point = json.loads(line)
+
+ original_id = data_point["id"]
+ answer = data_point["answer"]
+ gold_answer = data_point["gold_answer"]
+ score = data_point["score"]
+ has_ans = data_point["has_ans"]
+
+ data_points[original_id] = {
+ "task_id": task_list_from_metadata(metadata, {"original_id": original_id})[0],
+ "answer": answer,
+ "gold_answer": gold_answer,
+ "score": score,
+ "has_ans": has_ans,
+ }
+
+
+@pytest.mark.parametrize("original_id", list(data_points.keys()))
+def test_evaluate(original_id: str):
+
+ answer = data_points[original_id]["answer"]
+ gold_answer = data_points[original_id]["gold_answer"]
+ expected_score = data_points[original_id]["score"]
+ expected_has_ans = data_points[original_id]["has_ans"]
+
+ score, has_ans = question_scorer(answer, gold_answer)
+
+ # Assert if the expected results doesn't match
+ assert score == expected_score
+ assert has_ans == expected_has_ans
+
+
+@pytest.mark.parametrize(
+ "original_id",
+ [id for id in data_points.keys() if isinstance(data_points[id]["answer"], (str, float, int))],
+)
+@pytest.mark.slow
+def test_evaluate_within_env(original_id: str):
+
+ task_id = data_points[original_id]["task_id"]
+ answer = data_points[original_id]["answer"]
+ expected_score = data_points[original_id]["score"]
+
+ env = gym.make(
+ f"browsergym/{task_id}",
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ obs, reward, terminated, truncated, info = env.step(f"send_msg_to_user({repr(str(answer))})")
+ assert not obs["last_action_error"]
+ assert terminated
+ assert reward == expected_score
+
+ env.close()
diff --git a/BrowserGym/tests/core/__init__.py b/BrowserGym/tests/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..75f09d6fbde51609da41e1041eb3fb8125d808cb
--- /dev/null
+++ b/BrowserGym/tests/core/__init__.py
@@ -0,0 +1,2 @@
+# bugfix: use same playwright instance in browsergym and pytest
+from ..utils import setup_playwright
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..e2e61c694f20f358274a32f62c0cb74b6a63286b
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe.html
@@ -0,0 +1,37 @@
+
+
+
+ Iframe Example
+
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html
new file mode 100644
index 0000000000000000000000000000000000000000..d8e51b6ce1a4b8deebfd02868dd44e42e3a12158
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_iframe_site/basic_iframe_2.html
@@ -0,0 +1,12 @@
+
+
+
+ Simple Website
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..6cb49db9ca79b79111698aa23d975a1900296298
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_iframe_site/inner-iframe.html
@@ -0,0 +1,23 @@
+
+
+
+
+ Inner Iframe
+
+
+
+
+ Iframe Level 2
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html b/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..b71a077f2b374005894c2804aa9bf827e139d213
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_iframe_site/outer-iframe.html
@@ -0,0 +1,30 @@
+
+
+
+ Shadow DOM Example
+
+
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html b/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html
new file mode 100644
index 0000000000000000000000000000000000000000..242678f9696f448afffe5e5523aa36704fe6ec95
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_dom_site/basic_shadow_dom.html
@@ -0,0 +1,52 @@
+
+
+
+ Unit Test with Complex Nested Shadow DOM
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html b/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html
new file mode 100644
index 0000000000000000000000000000000000000000..fdcc8ceca07f897be41996144dd2a895d1a02229
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_dom_site/simple_shadow_dom.html
@@ -0,0 +1,22 @@
+
+
+
+ Unit Test with Complex Nested Shadow DOM
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..e2e61c694f20f358274a32f62c0cb74b6a63286b
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe.html
@@ -0,0 +1,37 @@
+
+
+
+ Iframe Example
+
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html
new file mode 100644
index 0000000000000000000000000000000000000000..dbcd6756822e81b68bdee21ec36944613b682826
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_iframe_site/basic_iframe_2.html
@@ -0,0 +1,12 @@
+
+
+
+ Simple Website
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..0d480d6701adc7d034f3e05c03b899b206b9f949
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_iframe_site/inner-iframe.html
@@ -0,0 +1,12 @@
+
+
+
+ Inner Iframe
+
+
+ Iframe Level 2
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html b/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html
new file mode 100644
index 0000000000000000000000000000000000000000..eed22ca03938bded8c1408df0a698515fa5068e9
--- /dev/null
+++ b/BrowserGym/tests/core/data/basic_shadow_iframe_site/outer-iframe.html
@@ -0,0 +1,40 @@
+
+
+
+ Shadow DOM Example
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/example.html b/BrowserGym/tests/core/data/example.html
new file mode 100644
index 0000000000000000000000000000000000000000..13552a70b0edc84663a94433a7da6ed525561e65
--- /dev/null
+++ b/BrowserGym/tests/core/data/example.html
@@ -0,0 +1,52 @@
+
+
+
+
+ Example Domain
+
+
+
+
+
+
+
+
+
+
Example Domain
+
This domain is for use in illustrative examples in documents. You may use this
+ domain in literature without prior coordination or asking for permission.
+
More information...
+
+
+
+
diff --git a/BrowserGym/tests/core/data/hover.html b/BrowserGym/tests/core/data/hover.html
new file mode 100644
index 0000000000000000000000000000000000000000..385bf2dc97085ea2a06eefdc22aa1af159bbe077
--- /dev/null
+++ b/BrowserGym/tests/core/data/hover.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/button_input.html b/BrowserGym/tests/core/data/input_type/button_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..9d6e6493c7594a0a9cd86cbd3f04fcfbea415c93
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/button_input.html
@@ -0,0 +1,10 @@
+
+
+
+
+Input Button
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/checkbox_input.html b/BrowserGym/tests/core/data/input_type/checkbox_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..ada1f2ff25cc66ed14281a96ca60021da9d173c4
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/checkbox_input.html
@@ -0,0 +1,19 @@
+
+
+
+
+Checkboxes
+The input type="checkbox" defines a checkbox:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/color_picker_input.html b/BrowserGym/tests/core/data/input_type/color_picker_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..e33b957dc62cb351ad6f2af5e4b2b55af5967acf
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/color_picker_input.html
@@ -0,0 +1,18 @@
+
+
+
+
+Show a Color Picker
+
+The input type="color" is used for input fields that should contain a color.
+
+
+
+Note: type="color" is not supported in Internet Explorer 11 or Safari 9.1 (or earlier).
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/date_input.html b/BrowserGym/tests/core/data/input_type/date_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..0e2d6a3fe1155b35651896483e9a072685f2c34d
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/date_input.html
@@ -0,0 +1,18 @@
+
+
+
+
+Date Field
+
+The input type="date" is used for input fields that should contain a date.
+
+
+
+Note: type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/date_min_max_input.html b/BrowserGym/tests/core/data/input_type/date_min_max_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..f519df9c130708a26e71c474496538c60d9930f4
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/date_min_max_input.html
@@ -0,0 +1,22 @@
+
+
+
+
+Date Field Restrictions
+
+Use the min and max attributes to add restrictions to dates:
+
+
+
+Note: type="date" is not supported in Internet Explorer 11 or prior Safari 14.1.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/date_time_local_input.html b/BrowserGym/tests/core/data/input_type/date_time_local_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..cc34237bebfa0704a8cc6d1553d5b490fed9dd58
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/date_time_local_input.html
@@ -0,0 +1,18 @@
+
+
+
+
+Local Date Field
+
+The input type="datetime-local" specifies a date and time input field, with no time zone.
+
+
+
+Note: type="datetime-local" is not supported in Internet Explorer 11 or prior Safari 14.1.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/email_input.html b/BrowserGym/tests/core/data/input_type/email_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..0e2f6c3b5db4022e32eef4eb8ac5c0aa79a8ba40
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/email_input.html
@@ -0,0 +1,16 @@
+
+
+
+
+Email Field
+
+The input type="email" is used for input fields that should contain an e-mail address:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/file_input.html b/BrowserGym/tests/core/data/input_type/file_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..5a026e729276c425c23e546cee19a7900cbae84d
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/file_input.html
@@ -0,0 +1,15 @@
+
+
+
+
+File upload
+
+Show a file-select field which allows a file to be chosen for upload:
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/hidden_field_input.html b/BrowserGym/tests/core/data/input_type/hidden_field_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..af16596e12dfde14ecc4f2d3daac9006f0cfb26a
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/hidden_field_input.html
@@ -0,0 +1,17 @@
+
+
+
+
+A Hidden Field (look in source code)
+
+
+
+Note: The hidden field is not shown to the user, but the data is sent when the form is submitted.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/image_input.html b/BrowserGym/tests/core/data/input_type/image_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..502fd2990500a8a21b2ef030db2017a90e0bd02f
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/image_input.html
@@ -0,0 +1,18 @@
+
+
+
+
+Display an Image as the Submit button
+
+
+
+Note: The input type="image" sends the X and Y coordinates of the click that activated the image button.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/img_submit.gif b/BrowserGym/tests/core/data/input_type/img_submit.gif
new file mode 100644
index 0000000000000000000000000000000000000000..dae3a9afcf1fb2cd5022b400df8e77dc6c486835
Binary files /dev/null and b/BrowserGym/tests/core/data/input_type/img_submit.gif differ
diff --git a/BrowserGym/tests/core/data/input_type/number_input.html b/BrowserGym/tests/core/data/input_type/number_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..1158e2baaab595f7ba1f8381fd95811b2fdf9be8
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/number_input.html
@@ -0,0 +1,18 @@
+
+
+
+
+Number Field
+
+The input type="number" defines a numeric input field.
+
+You can use the min and max attributes to add numeric restrictions in the input field:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/number_step_input.html b/BrowserGym/tests/core/data/input_type/number_step_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..8d68505c3c8005bbc6a055bca0210db276120050
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/number_step_input.html
@@ -0,0 +1,16 @@
+
+
+
+
+Numeric Steps
+
+Depending on browser support: Fixed steps will apply in the input field.
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/password_input.html b/BrowserGym/tests/core/data/input_type/password_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..66eb78622aec4c406d77b49bb61e8f7e99503e41
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/password_input.html
@@ -0,0 +1,20 @@
+
+
+
+
+Password field
+
+The input type="password" defines a password field:
+
+
+
+The characters in a password field are masked (shown as asterisks or circles).
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/radio_input.html b/BrowserGym/tests/core/data/input_type/radio_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..125d68f4df1bd3c28d1954519c01b40ff24daa68
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/radio_input.html
@@ -0,0 +1,19 @@
+
+
+
+
+Radio Buttons
+
+The input type="radio" defines a radio button:
+
+Choose your favorite Web language:
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/range_input.html b/BrowserGym/tests/core/data/input_type/range_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..d96b9791994a7546b71c4808ffd1b111546d3323
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/range_input.html
@@ -0,0 +1,16 @@
+
+
+
+
+Range Field
+
+Depending on browser support: The input type "range" can be displayed as a slider control.
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/reset_input.html b/BrowserGym/tests/core/data/input_type/reset_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..d7710c1a8078eceef1f0c3b70503c7aed35ac2da
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/reset_input.html
@@ -0,0 +1,21 @@
+
+
+
+
+Reset Button
+
+The input type="reset" defines a reset button that resets all form values to their default values:
+
+
+
+If you change the input values and then click the "Reset" button, the form-data will be reset to the default values.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/search_input.html b/BrowserGym/tests/core/data/input_type/search_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..db8ab66ed15a758c76d9f9fca7344c91b378dd10
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/search_input.html
@@ -0,0 +1,15 @@
+
+
+
+
+Search Field
+The input type="search" is used for search fields (behaves like a regular text field):
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/submit_input.html b/BrowserGym/tests/core/data/input_type/submit_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..257ebdda7f922079c4cd2648564e6adcd8be4c58
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/submit_input.html
@@ -0,0 +1,20 @@
+
+
+
+
+Submit Button
+
+The input type="submit" defines a button for submitting form data to a form-handler:
+
+
+
+If you click "Submit", the form-data will be sent to a page called "https://www.w3schools.com/action_page.php".
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/submit_nn_input.html b/BrowserGym/tests/core/data/input_type/submit_nn_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..da04e9d3a5413e6d5ca8f08c0c705d3156161569
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/submit_nn_input.html
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/telephone_input.html b/BrowserGym/tests/core/data/input_type/telephone_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..12a0c8a59a1da62b4e578dca976d167dbf1d8dfa
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/telephone_input.html
@@ -0,0 +1,17 @@
+
+
+
+
+Telephone Field
+
+The input type="tel" is used for input fields that should contain a telephone number:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/text_input.html b/BrowserGym/tests/core/data/input_type/text_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..811753a26fd5325d481b2246051dc1d2d153a540
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/text_input.html
@@ -0,0 +1,20 @@
+
+
+
+
+Text field
+The input type="text" defines a one-line text input field:
+
+
+
+Note that the form itself is not visible.
+Also note that the default width of a text field is 20 characters.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/time_input.html b/BrowserGym/tests/core/data/input_type/time_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..8ca605580af8ee58f86aecb20acf5d2d8fa9c263
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/time_input.html
@@ -0,0 +1,20 @@
+
+
+
+
+Show a Time Input Control
+
+The input type="time" allows the user to select a time (no time zone):
+
+If the browser supports it, a time picker pops up when entering the input field.
+
+
+
+Note: type="time" is not supported in Internet Explorer 11 or prior Safari 14.1.
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/url_input.html b/BrowserGym/tests/core/data/input_type/url_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..1f6bdf641d746e582d0ce3dad5f04e483ab7bef4
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/url_input.html
@@ -0,0 +1,16 @@
+
+
+
+
+Display a URL Input Field
+
+The input type="url" is used for input fields that should contain a URL address:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/input_type/week_input.html b/BrowserGym/tests/core/data/input_type/week_input.html
new file mode 100644
index 0000000000000000000000000000000000000000..1f6bdf641d746e582d0ce3dad5f04e483ab7bef4
--- /dev/null
+++ b/BrowserGym/tests/core/data/input_type/week_input.html
@@ -0,0 +1,16 @@
+
+
+
+
+Display a URL Input Field
+
+The input type="url" is used for input fields that should contain a URL address:
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/long_page.html b/BrowserGym/tests/core/data/long_page.html
new file mode 100644
index 0000000000000000000000000000000000000000..8fd6ca357e35581ea09f2c905b36ee9df439f92f
--- /dev/null
+++ b/BrowserGym/tests/core/data/long_page.html
@@ -0,0 +1,211 @@
+
+
+
+
+
+ This is the top
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is the bottom
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/lots_of_iframes.html b/BrowserGym/tests/core/data/lots_of_iframes.html
new file mode 100644
index 0000000000000000000000000000000000000000..ba342a9ced3d48364816b1fcb7888f5518a69001
--- /dev/null
+++ b/BrowserGym/tests/core/data/lots_of_iframes.html
@@ -0,0 +1,21 @@
+
+
+
+
+ Lots of Iframes
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/obstructed_checkbox_page.html b/BrowserGym/tests/core/data/obstructed_checkbox_page.html
new file mode 100644
index 0000000000000000000000000000000000000000..a3f9ec1f23c7dad374236eb1a6e19e52ceb56cb5
--- /dev/null
+++ b/BrowserGym/tests/core/data/obstructed_checkbox_page.html
@@ -0,0 +1,93 @@
+
+
+
+
+
+ Checkbox with Label Interception
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/data/screenshot.png b/BrowserGym/tests/core/data/screenshot.png
new file mode 100644
index 0000000000000000000000000000000000000000..a6140215b3ff376fe91d4ae7bb48edb724ef6f2f
Binary files /dev/null and b/BrowserGym/tests/core/data/screenshot.png differ
diff --git a/BrowserGym/tests/core/data/test_page.html b/BrowserGym/tests/core/data/test_page.html
new file mode 100644
index 0000000000000000000000000000000000000000..cdb46c801b32395364831e3c0a6dc32149bda067
--- /dev/null
+++ b/BrowserGym/tests/core/data/test_page.html
@@ -0,0 +1,29 @@
+
+
+
+ Simple Form
+
+
+ Simple Form
+
+
+
+
diff --git a/BrowserGym/tests/core/data/test_page_2.html b/BrowserGym/tests/core/data/test_page_2.html
new file mode 100644
index 0000000000000000000000000000000000000000..b3b2a5d69c83f74229fb2589f7c0798f960db9bb
--- /dev/null
+++ b/BrowserGym/tests/core/data/test_page_2.html
@@ -0,0 +1,63 @@
+
+
+
+
+ Simple Form
+
+
+
+ Simple Form
+
+
+
+
+ Text within a non-html tag
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Text that should not be visible
+
+
+
diff --git a/BrowserGym/tests/core/data/textbox.html b/BrowserGym/tests/core/data/textbox.html
new file mode 100644
index 0000000000000000000000000000000000000000..c93bd6f7835a9f11860ce6cd2406794c3376a26b
--- /dev/null
+++ b/BrowserGym/tests/core/data/textbox.html
@@ -0,0 +1,13 @@
+
+
+
+
+ Simple HTML Page
+
+
+
+
+
+
+
+
diff --git a/BrowserGym/tests/core/test_actions_highlevel.py b/BrowserGym/tests/core/test_actions_highlevel.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3a4f56c6f9f7d579cccee40f2d747c8c42cbdc9
--- /dev/null
+++ b/BrowserGym/tests/core/test_actions_highlevel.py
@@ -0,0 +1,1256 @@
+import ast
+import os
+import pathlib
+import platform
+import re
+
+import bs4
+import gymnasium as gym
+import pytest
+from pyparsing.exceptions import ParseException
+
+# register openended gym environments
+import browsergym.core
+from browsergym.core.action.highlevel import HighLevelActionSet
+from browsergym.core.action.parsers import NamedArgument, highlevel_action_parser
+from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
+from browsergym.utils.obs import flatten_dom_to_str
+
+_IS_MAC_OS = platform.system() == "Darwin"
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+__TIMEOUT = 500
+
+__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
+
+TEXTBOX_URL = f"file://{__DATA_DIR}/textbox.html"
+EXAMPLE_URL = f"file://{__DATA_DIR}/example.html"
+HOVER_URL = f"file://{__DATA_DIR}/hover.html"
+INEXISTANT_FILE_URL = f"file://{__DATA_DIR}/no_file_here.html"
+LONG_PAGE_URL = f"file://{__DATA_DIR}/long_page.html"
+TEXT_INPUT_URL = f"file://{__DATA_DIR}/input_type/text_input.html"
+URL_INPUT_URL = f"file://{__DATA_DIR}/input_type/url_input.html"
+CHECKBOX_URL = f"file://{__DATA_DIR}/input_type/checkbox_input.html"
+MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
+OBSTRUCTED_CHECKBOX_URL = f"file://{__DATA_DIR}/obstructed_checkbox_page.html"
+LOTS_OF_IFRAMES_URL = f"file://{__DATA_DIR}/lots_of_iframes.html"
+
+
+def test_action_parser():
+ parser = highlevel_action_parser
+
+ with pytest.raises(ParseException):
+ function_calls = parser.parse_string("", parseAll=True)
+ assert not function_calls
+
+ function_calls = parser.parse_string("a()", parseAll=True)
+ assert len(function_calls) == 1
+
+ function_calls = parser.parse_string(" a ( ) \n\n\t", parseAll=True)
+ assert len(function_calls) == 1
+
+ function_calls = parser.parse_string(" a ( ) b() \n \tc()", parseAll=True)
+ assert [function_name for function_name, _ in function_calls] == ["a", "b", "c"]
+
+ function_calls = parser.parse_string('a(12, 12.2, "text", (1, 2, 3), ["a", 23])', parseAll=True)
+ _, function_args = function_calls[0]
+ assert function_args == [12, 12.2, "text", (1, 2, 3), ["a", 23]]
+
+ function_calls = parser.parse_string('a(x=12, y = 12.2, other = "text")', parseAll=True)
+ _, function_args = function_calls[0]
+ assert function_args == [
+ NamedArgument(name="x", value=12),
+ NamedArgument(name="y", value=12.2),
+ NamedArgument(name="other", value="text"),
+ ]
+
+ function_calls = parser.parse_string('a(12, y = 12.2, other = "text")', parseAll=True)
+ _, function_args = function_calls[0]
+ assert function_args == [
+ 12,
+ NamedArgument(name="y", value=12.2),
+ NamedArgument(name="other", value="text"),
+ ]
+
+ with pytest.raises(ParseException):
+ function_calls = parser.parse_string('a(x = 12, 12.2, other = "text")', parseAll=True)
+
+ with pytest.raises(ParseException):
+ function_calls = parser.parse_string('a(12, 12.2, 1 = "text")', parseAll=True)
+
+ with pytest.raises(ParseException):
+ function_calls = parser.parse_string("a(1-)", parseAll=True)
+
+ with pytest.raises(ParseException):
+ function_calls = parser.parse_string("a(1/2)", parseAll=True)
+
+ function_calls = parser.parse_string('a("""\nsome\ntext\\"\\"""")', parseAll=True)
+ _, function_args = function_calls[0]
+ assert function_args == ['\nsome\ntext""']
+
+ function_calls = parser.parse_string("a('\"some\\ntext\"')", parseAll=True)
+ _, function_args = function_calls[0]
+ assert function_args == ['"some\ntext"']
+
+ function_calls = parser.parse_string('#comment\na("# not comment") #comment \n ', parseAll=True)
+ assert len(function_calls) == 1
+ function_name, function_args = function_calls[0]
+ assert function_name == "a"
+ assert function_args == ["# not comment"]
+
+ function_calls = parser.parse_string('fun(12, x="val", y={"aaa": 23})', parseAll=True)
+ function_name, function_args = function_calls[0]
+ assert function_name == "fun"
+ assert function_args == [
+ 12,
+ NamedArgument(name="x", value="val"),
+ NamedArgument(name="y", value={"aaa": 23}),
+ ]
+
+
+def test_valid_action():
+ action_set = HighLevelActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def get_checkbox_elem(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
+ return checkbox
+
+ obs, info = env.reset()
+ checkbox = get_checkbox_elem(obs)
+
+ # box not checked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # typo in action (unescaped double quotes)
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))}, "17" screen") # typo here
+"""
+ with pytest.raises(ValueError):
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # error and box not checked
+ assert "Received an empty action." in obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # click box 1 time
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 1
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ # click box 2 times
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 2
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box still checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ # click box 3 times
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+click({repr(checkbox.get(BID_ATTR))})
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box unchecked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # click box 3 times, same line ops
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))}) click({repr(checkbox.get(BID_ATTR))}) click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ # click box 3 times, multi line ops, whitespace, tab, comma in-between args
+ action = f"""\
+ click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
+ noop() noop () noop( )
+ # THIS IS A COMMENT
+ noop() # this is a noop() call
+click({repr(checkbox.get(BID_ATTR))}, )
+#click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box unchecked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # click box 3 times, multi line ops, whitespace, tab, comma in-between args, markdown code block
+ action = f"""\
+Below is code
+ ```python
+ click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
+ noop() noop () noop( )
+ # THIS IS A COMMENT
+ noop() # this is a noop() call
+click({repr(checkbox.get(BID_ATTR))}, )
+#click({repr(checkbox.get(BID_ATTR))})
+```
+This is not code, just an explanation
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ # multiple markdown code blocks
+ action = f"""\
+Below is code
+ ```python
+ noop() noop () noop( )
+ # THIS IS A COMMENT
+ noop() # this is a noop() call
+click({repr(checkbox.get(BID_ATTR))}, )
+#click({repr(checkbox.get(BID_ATTR))})
+```
+This is not code, just an explanation
+Below is more code
+ ```python
+ click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
+ noop() noop () noop( )
+ # THIS IS A COMMENT
+ noop() # this is a noop() call
+#click({repr(checkbox.get(BID_ATTR))})
+```
+This is not code, just an explanation
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box unchecked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # multiple function calls in the middle of text
+ action = f"""\
+Let's do a noop(), then noop () noop( ) then click({repr(checkbox.get(BID_ATTR))}, )
+#click({repr(checkbox.get(BID_ATTR))})
+Now let's do two more
+ click( {repr(checkbox.get(BID_ATTR))} ) click({repr(checkbox.get(BID_ATTR))})\t
+ noop() noop () noop( )
+ # THIS IS A COMMENT
+ noop() # this is a noop() call
+#click({repr(checkbox.get(BID_ATTR))})
+```
+This is not code, just an explanation
+This is garbage
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nclick(") == 3
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ env.close()
+
+
+def test_invalid_action():
+ action_set = HighLevelActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+
+ # click inexistant bid
+ action = f"""\
+click("INVALID_BID")
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert "ValueError" in obs["last_action_error"]
+
+ # invalid bid value type
+ action = f"""\
+click(None)
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got None"
+
+ # invalid bid value type
+ action = f"""\
+click(42.7)
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got 42.7"
+
+ # invalid bid value type
+ action = f"""\
+click([])
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got []"
+
+ # invalid bid value type
+ action = f"""\
+click([42, "a", True, None])
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got [42, 'a', True, None]"
+
+ # invalid bid value type
+ action = f"""\
+click({{}})
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got {}"
+
+ # invalid bid value type
+ action = f"""\
+click({{"k": "aaa"}})
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "ValueError: expected a string, got {'k': 'aaa'}"
+
+ # invalid action args (too many)
+ action = f"""\
+click("4", "aa", "bb")
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert obs["last_action_error"] == "Error: Locator.click: modifiers: expected array, got string"
+
+ # invalid action args (not enough)
+ action = f"""\
+click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert (
+ obs["last_action_error"]
+ == "TypeError: click() missing 1 required positional argument: 'bid'"
+ )
+
+ # invalid action args (not enough)
+ action = f"""\
+click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # error
+ assert (
+ obs["last_action_error"]
+ == "TypeError: click() missing 1 required positional argument: 'bid'"
+ )
+
+ # invalid action name
+ with pytest.raises(NameError):
+ action_set.to_python_code(
+ f"""\
+not_a_valid_action()
+"""
+ )
+
+ # forbidden fill action
+ with pytest.raises(NameError):
+ HighLevelActionSet(subsets=["coord"]).to_python_code(
+ f"""\
+fill("INVALID_BID", "some text")
+"""
+ )
+
+ # forbidden import
+ with pytest.raises(ValueError):
+ action_set.to_python_code(
+ f"""\
+import numpy as np
+"""
+ )
+
+ # invalid expression, results in empty action
+ with pytest.raises(ValueError):
+ action_set.to_python_code(
+ f"""\
+[
+"""
+ )
+
+ # invalid expression, results in empty action
+ with pytest.raises(ValueError):
+ action_set.to_python_code(
+ f"""\
+click
+"""
+ )
+
+ env.close()
+
+
+def test_click_through_frames():
+ action_set = HighLevelActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": MULTI_IFRAME_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ obs, info = env.reset()
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "checkbox_2"})
+
+ # box checked
+ assert checkbox.has_attr("checked")
+
+ # click box
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "checkbox_2"})
+
+ # box not checked
+ assert not checkbox.has_attr("checked")
+
+ env.close()
+
+
+def test_fill_through_iframe():
+ action_set = HighLevelActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": MULTI_IFRAME_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ obs, info = env.reset()
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ text_input = soup.find(
+ "input", attrs={"type": "text", "placeholder": "Enter text here in iframe"}
+ )
+
+ # empty input
+ assert text_input.get("value") == ""
+
+ # fill with some text
+ action = f"""\
+fill({repr(text_input.get(BID_ATTR))}, "This is a test value.")
+"""
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ text_input = soup.find(
+ "input", attrs={"type": "text", "placeholder": "Enter text here in iframe"}
+ )
+
+ # input filled to desired value
+ assert text_input.get("value") == "This is a test value."
+
+ env.close()
+
+
+def test_click():
+ action_set = HighLevelActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def get_checkbox_elem(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
+ return checkbox
+
+ obs, info = env.reset()
+ checkbox = get_checkbox_elem(obs)
+
+ # box not checked
+ assert not checkbox.has_attr("checked")
+
+ # click box
+ action = f"""
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ # box checked
+ assert checkbox.has_attr("checked")
+
+ # click box
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ # box unchecked
+ assert not checkbox.has_attr("checked")
+
+ # click box twice
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ python_action = action_set.to_python_code(action)
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ # box still unchecked
+ assert not checkbox.has_attr("checked")
+
+ env.close()
+
+
+def test_hover():
+ action_set = HighLevelActionSet(subsets=["bid", "coord"])
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": HOVER_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def get_button_elem(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ button = soup.find("input", attrs={"type": "button"})
+ return button
+
+ obs, info = env.reset()
+ button = get_button_elem(obs)
+
+ assert not obs["last_action_error"]
+ assert button.get("value") == "Hover me"
+
+ action = f"""
+hover({repr(button.get(BID_ATTR))})
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ button = get_button_elem(obs)
+
+ assert not obs["last_action_error"]
+ assert button.get("value") == "Hello world!"
+
+ action = f"""
+mouse_move(0, 0)
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ button = get_button_elem(obs)
+
+ assert not obs["last_action_error"]
+ assert button.get("value") == "Hover me"
+
+ env.close()
+
+
+def test_fill_type_press():
+ action_set = HighLevelActionSet(subsets=["bid", "coord"])
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEXT_INPUT_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def get_fname_lname_elems(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ fname = soup.find("input", attrs={"id": "fname"})
+ lname = soup.find("input", attrs={"id": "lname"})
+ return fname, lname
+
+ obs, info = env.reset()
+ fname, lname = get_fname_lname_elems(obs)
+
+ # type using bid
+ action = f"""
+fill({repr(fname.get(BID_ATTR))}, 'Christian')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "Christian"
+ assert lname.get("value") == ""
+
+ # type using bid
+ action = f"""
+fill({repr(lname.get(BID_ATTR))}, 'Clavier')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "Christian"
+ assert lname.get("value") == "Clavier"
+
+ # type using focus and keyboard_type
+ action = f"""
+focus({repr(fname.get(BID_ATTR))}) keyboard_type('GΓ©rard')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "ChristianGΓ©rard"
+ assert lname.get("value") == "Clavier"
+
+ # type using click and keyboard_insert_text
+ action = f"""
+click({repr(lname.get(BID_ATTR))}) keyboard_insert_text('Jugnot')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "ChristianGΓ©rard"
+ assert lname.get("value") == "ClavierJugnot"
+
+ # type using clear and keyboard_insert_text
+ action = f"""
+clear({repr(lname.get(BID_ATTR))}) keyboard_insert_text('Jugnot')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "ChristianGΓ©rard"
+ assert lname.get("value") == "Jugnot"
+
+ # type using click, manual clear and keyboard_insert_text
+ action = f"""
+click({repr(fname.get(BID_ATTR))})
+# clear the field
+keyboard_press('End')
+keyboard_down('Shift')
+keyboard_press('Home')
+keyboard_up('Shift')
+keyboard_press('Backspace')
+# insert text
+keyboard_insert_text('GΓ©rard')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "GΓ©rard"
+ assert lname.get("value") == "Jugnot"
+
+ # fill empty text
+ action = f"""
+fill({repr(fname.get(BID_ATTR))}, '')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == ""
+ assert lname.get("value") == "Jugnot"
+
+ # type in currently focused element
+ action = f"""
+keyboard_type('Jean')
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "Jean"
+ assert lname.get("value") == "Jugnot"
+
+ # de-focus (click 0, 0), then type text
+ action = f"""
+mouse_click(0, 0)
+"""
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "Jean"
+ assert lname.get("value") == "Jugnot"
+
+ action = f"""
+keyboard_type('Reno')
+"""
+ obs, reward, terminated, truncated, info = env.step(action)
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert not obs["last_action_error"]
+ assert fname.get("value") == "Jean"
+ assert lname.get("value") == "Jugnot"
+
+ env.close()
+
+
+@pytest.mark.skip(reason="Not implemented yet")
+def test_dblclick():
+ pass
+
+
+# copy/paste text using a sequence of keyboard_press actions
+def test_key_press():
+ action_set = HighLevelActionSet(subsets=["bid", "coord"])
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEXT_INPUT_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ obs, info = env.reset()
+
+ def get_fname_lname_elems(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ fname = soup.find("input", attrs={"id": "fname"})
+ lname = soup.find("input", attrs={"id": "lname"})
+ return fname, lname
+
+ fname, lname = get_fname_lname_elems(obs)
+
+ action = f"""
+ fill({repr(fname.get(BID_ATTR))}, "Christian")
+ keyboard_press({repr("Meta+a" if _IS_MAC_OS else "Control+a")})
+ keyboard_press({repr("Meta+c" if _IS_MAC_OS else "Control+c")})
+ click({repr(lname.get(BID_ATTR))})
+ keyboard_press({repr("Meta+v" if _IS_MAC_OS else "Control+v")})
+ """
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ assert not obs["last_action_error"]
+
+ fname, lname = get_fname_lname_elems(obs)
+
+ assert lname.get("value") == "Christian"
+
+ env.close()
+
+
+def test_goto():
+ url1 = URL_INPUT_URL
+ url2 = TEXT_INPUT_URL
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": url1},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ )
+
+ obs, info = env.reset()
+
+ assert obs["url"] == url1
+
+ action = f"""
+goto({repr(url2)})
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ assert not obs["last_action_error"]
+
+ assert obs["url"] == url2
+
+ action = """
+go_back()
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ assert not obs["last_action_error"]
+
+ assert obs["url"] == url1
+
+ action = """
+go_forward()
+"""
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ assert not obs["last_action_error"]
+
+ assert obs["url"] == url2
+
+ env.close()
+
+
+def test_scroll():
+ action_set = HighLevelActionSet(subsets=["coord"])
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": LONG_PAGE_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def extract_coords_from_elem(elem):
+ return ast.literal_eval(elem.get("center"))
+
+ def get_top_bottom_elems(obs):
+ soup = bs4.BeautifulSoup(
+ flatten_dom_to_str(
+ obs["dom_object"], obs["extra_element_properties"], with_center_coords=True
+ ),
+ "lxml",
+ )
+ top = soup.find("input", attrs={"type": "checkbox", "id": "top"})
+ bottom = soup.find("input", attrs={"type": "checkbox", "id": "bottom"})
+ return top, bottom
+
+ obs, info = env.reset()
+ top, bottom = get_top_bottom_elems(obs)
+ top_x, top_y = extract_coords_from_elem(top)
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # top not checked
+ assert not top.has_attr("checked")
+ # bottom not checked
+ assert not bottom.has_attr("checked")
+
+ # click top
+ action = f"mouse_click({repr(top_x)}, {repr(top_y)})"
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ top, bottom = get_top_bottom_elems(obs)
+ top_x, top_y = extract_coords_from_elem(top)
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # no error
+ assert not obs["last_action_error"]
+ # top checked
+ assert top.has_attr("checked")
+ # bottom not checked
+ assert not bottom.has_attr("checked")
+
+ top, bottom = get_top_bottom_elems(obs)
+ top_x, top_y = extract_coords_from_elem(top)
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # click bottom
+ action = f"mouse_click({repr(bottom_x)}, {repr(bottom_y)})"
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ top, bottom = get_top_bottom_elems(obs)
+ top_x, top_y = extract_coords_from_elem(top)
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # no error (click coordinates out of viewport is a silent fail in playwright)
+ assert not obs["last_action_error"]
+ # top checked
+ assert top.has_attr("checked")
+ # bottom not checked (click didn't go through)
+ assert not bottom.has_attr("checked")
+
+ # scroll up
+ action = f"scroll(0, -500)"
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ top, bottom = get_top_bottom_elems(obs)
+ prev_top_x, prev_top_y = top_x, top_y
+ top_x, top_y = extract_coords_from_elem(top)
+ prev_bottom_x, prev_bottom_y = bottom_x, bottom_y
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ # no movement
+ assert prev_top_x == top_x and prev_top_y == top_y
+ assert prev_bottom_x == bottom_x and prev_bottom_y == bottom_y
+
+ # scroll down
+ action = f"scroll(0, 500)"
+
+ obs, reward, terminated, truncated, info = env.step(action)
+
+ top, bottom = get_top_bottom_elems(obs)
+ prev_top_x, prev_top_y = top_x, top_y
+ top_x, top_y = extract_coords_from_elem(top)
+ prev_bottom_x, prev_bottom_y = bottom_x, bottom_y
+ bottom_x, bottom_y = extract_coords_from_elem(bottom)
+
+ # no error
+ assert not obs["last_action_error"]
+
+ # movement
+ assert prev_top_x == top_x and prev_top_y > top_y
+ assert prev_bottom_x == bottom_x and prev_bottom_y > bottom_y
+
+ env.close()
+
+
+def test_tab_actions():
+ action_set = HighLevelActionSet(subsets=["tab", "nav"])
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+ assert len(obs["open_pages_urls"]) == 1
+ assert len(obs["open_pages_titles"]) == 1
+ assert obs["active_page_index"] == 0
+ assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
+
+ obs, reward, terminated, truncated, info = env.step("new_tab()")
+ assert not obs["last_action_error"]
+ assert len(obs["open_pages_urls"]) == 2
+ assert len(obs["open_pages_titles"]) == 2
+ assert obs["active_page_index"] == 1
+ assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
+
+ obs, reward, terminated, truncated, info = env.step(f"goto({repr(TEXTBOX_URL)})")
+ assert not obs["last_action_error"]
+ assert len(obs["open_pages_urls"]) == 2
+ assert len(obs["open_pages_titles"]) == 2
+ assert obs["active_page_index"] == 1
+ assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
+
+ obs, reward, terminated, truncated, info = env.step("tab_focus(0)")
+ assert not obs["last_action_error"]
+ assert len(obs["open_pages_urls"]) == 2
+ assert len(obs["open_pages_titles"]) == 2
+ assert obs["active_page_index"] == 0
+ assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
+
+ obs, reward, terminated, truncated, info = env.step("tab_close()")
+ assert not obs["last_action_error"]
+ assert len(obs["open_pages_urls"]) == 1
+ assert len(obs["open_pages_titles"]) == 1
+ assert obs["active_page_index"] == 0
+ assert obs["open_pages_urls"][obs["active_page_index"][0]] == obs["url"]
+
+ env.close()
+
+
+def test_mouse_down_up():
+ action_set = HighLevelActionSet(subsets=["bid", "coord"])
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ def get_checkbox_elem(obs):
+ soup = bs4.BeautifulSoup(
+ flatten_dom_to_str(
+ obs["dom_object"], obs["extra_element_properties"], with_center_coords=True
+ ),
+ "lxml",
+ )
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "vehicle1"})
+ return checkbox
+
+ obs, info = env.reset()
+ checkbox = get_checkbox_elem(obs)
+
+ # box not checked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # click box 1 time
+ x, y = ast.literal_eval(checkbox.get("center"))
+ action = f"""\
+mouse_click({repr(x)}, {repr(y)})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nmouse_") == 1
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box checked
+ assert not obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ # click box 1 time
+ x, y = ast.literal_eval(checkbox.get("center"))
+ action = f"""\
+mouse_move(0, 0)
+mouse_move({repr(x)}, {repr(y)})
+mouse_down({repr(x)}, {repr(y)})
+mouse_up({repr(x)}, {repr(y)})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nmouse_") == 4
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box not checked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+ # click box 2 times
+ x, y = ast.literal_eval(checkbox.get("center"))
+ action = f"""\
+mouse_move(0, 0)
+mouse_move({repr(x)}, {repr(y)})
+mouse_down({repr(x)}, {repr(y)}, button="left")
+mouse_up({repr(x)}, {repr(y)}, "left")
+mouse_down({repr(x)}, {repr(y)})
+mouse_up({repr(x)}, {repr(y)})
+"""
+ python_action = action_set.to_python_code(action)
+
+ assert python_action.count("\nmouse_") == 6
+
+ obs, reward, term, trunc, info = env.step(action)
+ checkbox = get_checkbox_elem(obs)
+
+ # box not checked
+ assert not obs["last_action_error"]
+ assert not checkbox.has_attr("checked")
+
+
+# test that forced action can click an obstructed element
+@pytest.mark.parametrize("retry_with_force", [True, False])
+def test_forced_actions(retry_with_force):
+ action_set = HighLevelActionSet(subsets=["bid"], retry_with_force=retry_with_force)
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": OBSTRUCTED_CHECKBOX_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ obs, info = env.reset()
+
+ def get_checkbox(obs):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"id": "hobbies-checkbox-1"})
+ return checkbox
+
+ checkbox = get_checkbox(obs)
+
+ action = f"""
+ click({repr(checkbox.get(BID_ATTR))})
+ """
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ checkbox = get_checkbox(obs)
+ if retry_with_force:
+ assert not obs["last_action_error"]
+ assert checkbox.get("checked", False) == False
+ else:
+ assert obs["last_action_error"]
+ assert checkbox.has_attr("checked")
+
+ env.close()
+
+
+# TODO investigate why it takes ~1sec to mark each frame, although they are very small, and if we can do something about it
+@pytest.mark.slow
+def test_iframe_bid():
+ action_set = HighLevelActionSet(subsets=["bid"])
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": LOTS_OF_IFRAMES_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+
+ obs, info = env.reset()
+
+ def get_checkbox(obs, i):
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"id": f"checkbox{i}"})
+ return checkbox
+
+ # try to click on checkboxes
+ checkboxes = [
+ (0, "a"),
+ # (5, "f"),
+ # (26, "aA"),
+ (29, "aD"),
+ ]
+ for id, iframe_bid in checkboxes:
+
+ # try to click on checkbox
+ checkbox = get_checkbox(obs, id)
+ bid = checkbox.get(BID_ATTR)
+
+ # iframe bid should match
+ assert re.match(f"^{iframe_bid}[0-9]+$", bid)
+
+ action = f"""
+ click({repr(bid)})
+ """
+
+ obs, reward, terminated, truncated, info = env.step(action)
+ assert not obs["last_action_error"]
+
+ # checkbox should get checked
+ checkbox = get_checkbox(obs, id)
+ assert checkbox.has_attr("checked")
+
+ env.close()
diff --git a/BrowserGym/tests/core/test_actions_python.py b/BrowserGym/tests/core/test_actions_python.py
new file mode 100644
index 0000000000000000000000000000000000000000..69cc6237bb1f128709578f7ea84a969cfb33adf8
--- /dev/null
+++ b/BrowserGym/tests/core/test_actions_python.py
@@ -0,0 +1,60 @@
+import pytest
+
+from browsergym.core.action.python import PythonActionSet
+
+
+ACTIONS_TO_TEST = [
+ (
+ """\
+a = 0
+""",
+ """\
+a = 0
+""",
+ ),
+ (
+ """\
+```
+a = 0
+```
+""",
+ """\
+a = 0
+""",
+ ),
+ (
+ """\
+```python
+a = 0
+```
+""",
+ """\
+a = 0
+""",
+ ),
+ (
+ """\
+```python
+a = 0
+```
+This is an explanation
+```python
+b = 3
+```
+More explanations
+""",
+ """\
+a = 0
+
+b = 3
+""",
+ ),
+]
+
+
+@pytest.mark.parametrize("action,expected_code", ACTIONS_TO_TEST)
+def test_action_cleaning(action, expected_code):
+ action_set = PythonActionSet()
+ code = action_set.to_python_code(action)
+
+ assert code == expected_code
diff --git a/BrowserGym/tests/core/test_gym_envs.py b/BrowserGym/tests/core/test_gym_envs.py
new file mode 100644
index 0000000000000000000000000000000000000000..48fca3a32e119ee0a1e58440565c8e10d1fca2d7
--- /dev/null
+++ b/BrowserGym/tests/core/test_gym_envs.py
@@ -0,0 +1,313 @@
+import os
+import pathlib
+from time import time
+
+import bs4
+import gymnasium as gym
+import pytest
+
+# register openended gym environments
+import browsergym.core
+import browsergym.core.action
+from browsergym.core.action.highlevel import HighLevelActionSet
+from browsergym.core.action.python import PythonActionSet
+from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
+from browsergym.utils.obs import flatten_dom_to_str
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+__TIMEOUT = 500
+
+__DATA_DIR = pathlib.Path(__file__).resolve().parent / "data"
+TEST_PAGE = f"file://{__DATA_DIR}/test_page.html"
+BASIC_IFRAME_PAGE = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
+
+
+def test_gym_env():
+ action_set = PythonActionSet()
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+
+ assert not obs["last_action_error"]
+
+ obs, reward, term, trunc, info = env.step(
+ f"""\
+page.get_by_label("Name:").click()
+page.get_by_label("Name:").fill("Janice")
+page.get_by_label("Name:").press("Tab")
+page.get_by_label("Email:").fill("janice@mail.com")
+page.get_by_label("Email:").press("Tab")
+page.get_by_label("Age:", exact=True).fill("21")
+page.get_by_label("Age:", exact=True).press("Tab")
+"""
+ )
+
+ assert obs["last_action_error"] == ""
+ assert reward == 0
+ assert term == False
+ assert trunc == False
+
+ obs, reward, term, trunc, info = env.step(
+ f"""\
+page.get_by_label("Message:").fill("Hello")
+page.get_by_label("Message:").press("Tab")
+page.get_by_label("Subscribe to newsletter").check()
+page.get_by_label("Subscribe to newsletter").press("Tab")
+page.get_by_role("button", name="Submit").press("Enter")
+"""
+ )
+
+ assert obs["last_action_error"] == ""
+ assert reward == 0
+ assert term == False
+ assert trunc == False
+
+ obs, reward, term, trunc, info = env.step(
+ f"""\
+page.get_by_label("LABEL DOES NOT EXIST:").fill("Hello")
+page.get_by_role("button", name="Submit").press("Enter")
+"""
+ )
+
+ assert obs["last_action_error"] != ""
+ assert reward == 0
+ assert term == False
+ assert trunc == False
+
+ env.close()
+
+
+def test_max_episode_steps():
+ # no max_steps
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ obs, reward, term, trunc, info = env.step("")
+
+ assert term == False
+ assert trunc == False
+
+ obs, reward, term, trunc, info = env.step("")
+
+ assert term == False
+ assert trunc == False
+
+ # max_steps = 2
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ max_episode_steps=2,
+ )
+ obs, info = env.reset()
+
+ obs, reward, term, trunc, info = env.step("")
+
+ assert term == False
+ assert trunc == False
+
+ obs, reward, term, trunc, info = env.step("")
+
+ assert term == False
+ assert trunc == True
+
+ env.close()
+
+
+def test_active_page():
+ action_set = PythonActionSet()
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+
+ assert len(obs["open_pages_urls"]) == 1
+ assert obs["active_page_index"] == 0
+
+ obs, reward, term, trunc, info = env.step("page.context.new_page()")
+
+ assert len(obs["open_pages_urls"]) == 2
+ assert obs["active_page_index"] == 1
+
+ obs, reward, term, trunc, info = env.step("page.context.pages[0].mouse.click(5, 5)")
+
+ assert len(obs["open_pages_urls"]) == 2
+ assert obs["active_page_index"] == 0
+
+ obs, reward, term, trunc, info = env.step("page.context.pages[1].mouse.click(5, 5)")
+
+ assert len(obs["open_pages_urls"]) == 2
+ assert obs["active_page_index"] == 1
+
+ obs, reward, term, trunc, info = env.step("page.context.pages[1].close()")
+
+ assert len(obs["open_pages_urls"]) == 1
+ assert obs["active_page_index"] == 0
+
+ obs, reward, term, trunc, info = env.step("page.close()")
+
+ assert len(obs["open_pages_urls"]) == 1
+ assert obs["active_page_index"] == 0
+
+ obs, reward, term, trunc, info = env.step("page.context.new_page()")
+
+ assert len(obs["open_pages_urls"]) == 2
+ assert obs["active_page_index"] == 1
+
+ obs, reward, term, trunc, info = env.step("page.close()")
+
+ assert len(obs["open_pages_urls"]) == 1
+ assert obs["active_page_index"] == 0
+
+ env.close()
+
+
+def test_nested_iframes_default_demo_mode():
+ demo_mode = "default"
+ action_set = HighLevelActionSet(demo_mode=demo_mode)
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": BASIC_IFRAME_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ inner_checkbox = soup.find("input", attrs={"id": "checkbox_2"})
+
+ assert inner_checkbox.has_attr("checked")
+ # click box
+ action = f"""\
+click({repr(inner_checkbox.get(BID_ATTR))})
+"""
+ click_start = time()
+ obs, _, _, _, _ = env.step(action)
+ click_end = time()
+ # clicking should be slow in demo mode
+ assert click_end - click_start > 1
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ inner_checkbox = soup.find("input", attrs={"id": "checkbox_2"})
+ # box is not checked; meaning it was clicked by the previous action
+ assert not inner_checkbox.has_attr("checked")
+
+ env.close()
+
+
+@pytest.mark.parametrize("global_demo_mode", [True, False])
+@pytest.mark.parametrize("demo_mode", [None, "off", "default", "only_visible_elements", "all_blue"])
+def test_demo_mode(global_demo_mode: bool, demo_mode: str):
+ action_set = HighLevelActionSet(demo_mode=demo_mode)
+ browsergym.core.action.set_global_demo_mode(global_demo_mode)
+
+ demo_mode_active = (global_demo_mode and demo_mode is None) or (
+ demo_mode is not None and demo_mode != "off"
+ )
+
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=action_set.to_python_code,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ email_field = soup.find("input", attrs={"id": "email"})
+ checkbox = soup.find("input", attrs={"id": "subscribe"})
+
+ # check that the email field is empty
+ assert email_field.get("value") == ""
+
+ # check that the box is not checked
+ assert not checkbox.has_attr("checked")
+
+ # click box
+ action = f"""\
+click({repr(checkbox.get(BID_ATTR))})
+"""
+ obs, reward, terminated, truncated, info = env.step(action)
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+ checkbox = soup.find("input", attrs={"type": "checkbox", "id": "subscribe"})
+
+ # check that the box is checked
+ assert checkbox.has_attr("checked")
+
+ # clicking should be slow (only in demo mode)
+ action_time = info["action_exec_stop"] - info["action_exec_start"]
+ if demo_mode_active:
+ assert action_time > 2
+ else:
+ assert action_time <= 1.5
+
+ # fill box
+ action = f"""\
+fill({repr(email_field.get(BID_ATTR))}, "test@test")
+"""
+ obs, reward, terminated, truncated, info = env.step(action)
+ assert not obs["last_action_error"]
+
+ soup = bs4.BeautifulSoup(flatten_dom_to_str(obs["dom_object"]), "lxml")
+
+ # email field has been filled correctly
+ email_field = soup.find("input", attrs={"id": "email"})
+ assert email_field.get("value") == "test@test"
+
+ # typing should be slow (only in demo mode)
+ action_time = info["action_exec_stop"] - info["action_exec_start"]
+ if demo_mode_active:
+ assert action_time > 2
+ else:
+ assert action_time <= 1.5
+
+ env.close()
+
+
+@pytest.mark.parametrize("resizeable_window", (True, False))
+@pytest.mark.parametrize("size", ((1600, 1200), (800, 800)))
+def test_resizeable_window(resizeable_window, size):
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ viewport={"width": size[0], "height": size[1]},
+ resizeable_window=resizeable_window,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ assert (obs["screenshot"].shape[1], obs["screenshot"].shape[0]) == size
+
+ env.close()
diff --git a/BrowserGym/tests/core/test_observation.py b/BrowserGym/tests/core/test_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..36bb341937b265d8792199ac4722d404b3199049
--- /dev/null
+++ b/BrowserGym/tests/core/test_observation.py
@@ -0,0 +1,819 @@
+import ast
+import os
+from pathlib import Path
+
+import bs4
+import gymnasium as gym
+import numpy as np
+import pytest
+import regex as re
+
+# register gym environments
+import browsergym.core
+from browsergym.core.constants import BROWSERGYM_ID_ATTRIBUTE as BID_ATTR
+from browsergym.core.observation import (
+ _post_extract,
+ _pre_extract,
+ extract_all_frame_axtrees,
+ extract_dom_snapshot,
+ extract_merged_axtree,
+ extract_screenshot,
+)
+from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+__TIMEOUT = 500
+__VIEWPORT = {"width": 800, "height": 600}
+
+__DATA_DIR = Path(__file__).resolve().parent / "data"
+
+TEST_PAGE = f"file://{__DATA_DIR}/test_page.html"
+TEST_PAGE_2 = f"file://{__DATA_DIR}/test_page_2.html"
+MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
+SHADOW_DOM_URL = f"file://{__DATA_DIR}/basic_shadow_dom_site/basic_shadow_dom.html"
+SIMPLE_SHADOW_DOM_URL = f"file://{__DATA_DIR}/basic_shadow_dom_site/simple_shadow_dom.html"
+BASIC_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/basic_iframe.html"
+BASIC_IFRAME_2_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/basic_iframe_2.html"
+INNER_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/inner-iframe.html"
+OUTER_IFRAME_URL = f"file://{__DATA_DIR}/basic_shadow_iframe_site/outer-iframe.html"
+CUSTOM_PAGE_URL = f"file://{__DATA_DIR}/custom_page/basic_iframe.html"
+MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
+
+
+@pytest.mark.skip(reason="TODO: how to get the final viewport size right?")
+def test_extract_screenshot():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ _pre_extract(env.unwrapped.page)
+ screenshot = extract_screenshot(env.unwrapped.page)
+ _post_extract(env.unwrapped.page)
+
+ # 3D array (height, width, rgb) of unsigned bytes (between 0 and 255)
+ assert isinstance(screenshot, np.ndarray)
+ assert len(screenshot.shape) == 3
+ assert screenshot.shape[0] == __VIEWPORT["height"]
+ assert screenshot.shape[1] == __VIEWPORT["width"]
+ assert screenshot.shape[2] == 3 # RGB
+ assert screenshot.dtype == np.uint8
+
+ env.close()
+
+
+def test_extract_axtree_simple():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ _pre_extract(env.unwrapped.page)
+ all_frame_axtrees = extract_all_frame_axtrees(env.unwrapped.page)
+ merged_axtree = extract_merged_axtree(env.unwrapped.page)
+ _post_extract(env.unwrapped.page)
+
+ # single frame
+ assert len(all_frame_axtrees) == 1
+ assert len(next(iter(all_frame_axtrees.values()))["nodes"]) == len(merged_axtree["nodes"])
+
+ env.close()
+
+
+def test_extract_axtree_multi_iframe():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": MULTI_IFRAME_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ _pre_extract(env.unwrapped.page)
+ all_frame_axtrees = extract_all_frame_axtrees(env.unwrapped.page)
+ merged_axtree = extract_merged_axtree(env.unwrapped.page)
+ _post_extract(env.unwrapped.page)
+
+ # multiple frames
+ assert len(all_frame_axtrees) == 3
+
+ # total number of nodes in merged and individual frame axtrees should be equal
+ n_nodes = 0
+ for frame_id, frame_axtree in all_frame_axtrees.items():
+ n_nodes += len(frame_axtree["nodes"])
+
+ assert n_nodes == len(merged_axtree["nodes"])
+
+ env.close()
+
+
+def test_extract_dom_simple():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ _pre_extract(env.unwrapped.page)
+ dom_snapshot = extract_dom_snapshot(env.unwrapped.page)
+ _post_extract(env.unwrapped.page)
+
+ # single frame
+ assert len(dom_snapshot["documents"]) == 1
+
+ env.close()
+
+
+def test_extract_dom_multi_iframe():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": MULTI_IFRAME_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ _pre_extract(env.unwrapped.page)
+ dom_snapshot = extract_dom_snapshot(env.unwrapped.page)
+ _post_extract(env.unwrapped.page)
+
+ # multiple frames
+ assert len(dom_snapshot["documents"]) == 3
+
+ env.close()
+
+
+def test_simple_shadowdom():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": SIMPLE_SHADOW_DOM_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ # retrieve an input element inside the shadowDOM
+ elem = env.unwrapped.page.get_by_placeholder("Level 1.1 Text Field 1")
+ assert elem.count() == 1
+
+ # elem should have a browsergym_id in its BID_ATTR attribute
+ elem_id = elem.get_attribute(BID_ATTR)
+ assert elem_id is not None
+
+ # elem should not have an aria-description (it should have been cleaned)
+ aria_description = elem.get_attribute("aria-description")
+ assert aria_description is None
+
+ # elem should not have an aria-roledescription (it should have been cleaned)
+ aria_roledescription = elem.get_attribute("aria-roledescription")
+ assert aria_roledescription is None
+
+ # check that elem can be retrieved correctly using its browsergym_id
+ elem2 = env.unwrapped.page.get_by_test_id(elem_id)
+ assert elem2.count() == 1
+ assert env.unwrapped.page.evaluate(
+ "([node1, node2]) => {return node1.isEqualNode(node2);}",
+ [elem.element_handle(), elem2.element_handle()],
+ )
+
+ env.close()
+
+
+def test_nested_shadowdom():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": SHADOW_DOM_URL},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ # retrieve an input element inside the nested shadowDOM
+ elem = env.unwrapped.page.get_by_placeholder("Level 2.4 Text Field 2")
+ assert elem.count() == 1
+
+ # elem should have a browsergym_id in its BID_ATTR attribute
+ elem_id = elem.get_attribute(BID_ATTR)
+ assert elem_id is not None
+
+ # elem should not have an aria-description (it should have been cleaned)
+ aria_description = elem.get_attribute("aria-description")
+ assert aria_description is None
+
+ # elem should not have an aria-roledescription (it should have been cleaned)
+ aria_roledescription = elem.get_attribute("aria-roledescription")
+ assert aria_roledescription is None
+
+ # check that elem can be retrieved correctly using its browsergym_id
+ elem2 = env.unwrapped.page.get_by_test_id(elem_id)
+ assert elem2.count() == 1
+ assert env.unwrapped.page.evaluate(
+ "([node1, node2]) => {return node1.isEqualNode(node2);}",
+ [elem.element_handle(), elem2.element_handle()],
+ )
+
+ env.close()
+
+
+@pytest.mark.parametrize(
+ "url",
+ [
+ TEST_PAGE,
+ MULTI_IFRAME_URL,
+ SIMPLE_SHADOW_DOM_URL,
+ BASIC_IFRAME_URL,
+ BASIC_IFRAME_2_URL,
+ INNER_IFRAME_URL,
+ OUTER_IFRAME_URL,
+ ],
+)
+def test_dom_has_bids_no_aria(url):
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": url},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ viewport=__VIEWPORT,
+ timeout=__TIMEOUT,
+ )
+ obs, info = env.reset()
+
+ # exceptions
+ dom_node_names_without_bid = ["html", "#text", "#document", "#comment"]
+ axtree_roles_without_bid = ["RootWebArea", "none", "generic", "StaticText", "InlineTextBox"]
+
+ # 1. test the DOM snapshot for BID_ATTR, "aria-description" and "aria-roledescription"
+
+ # check all HTML elements in the DOM for unique browsergym id
+ dom = obs["dom_object"]
+ bids = []
+ for doc in dom["documents"]:
+ for node_name_id, attributes in zip(doc["nodes"]["nodeName"], doc["nodes"]["attributes"]):
+ node_name = dom["strings"][node_name_id]
+ # read the node's attributes
+ j = 0
+ bid = None
+ while j < len(attributes):
+ attr_name = dom["strings"][attributes[j]]
+ attr_value = dom["strings"][attributes[j + 1]]
+
+ # print(f"{node_name} {attr_name}: {attr_value}")
+
+ # check that the "aria-roledescription" attribute is absent (this is specific to this test page)
+ assert attr_name != "aria-roledescription"
+
+ # check that the "aria-description" attribute is absent (this is specific to this test page)
+ assert attr_name != "aria-description"
+
+ # extract the browsergym id from the BID_ATTR attribute
+ if attr_name == BID_ATTR:
+ bid = attr_value
+ j += 2
+
+ # check that all elements (with exceptions) have a browsergym id
+ if node_name not in dom_node_names_without_bid:
+ assert bid is not None
+
+ if bid is not None:
+ bids.append(bid)
+
+ # check that all browsergym ids are unique
+ assert len(bids) == len(set(bids))
+
+ # 2. test the AXTree for "browsergym_id" and "description" properties
+ axtree = obs["axtree_object"]
+ bids = []
+ for node in axtree["nodes"]:
+ bid = node.get("browsergym_id", None)
+
+ # check that the "aria-roledescription" attribute is absent (this is specific to this test page)
+ for property in node.get("properties", []):
+ assert property["name"] != "roledescription"
+
+ # check that the "aria-description" attribute is absent (this is specific to this test page)
+ assert "description" not in node
+
+ # check that all elements (with exceptions) have a browsergym id
+ if node["role"]["value"] not in axtree_roles_without_bid:
+ assert bid is not None
+
+ if bid is not None:
+ bids.append(bid)
+
+ # check that all browsergym ids are unique
+ assert len(bids) == len(set(bids))
+
+ env.close()
+
+
+def test_dom_to_text():
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={"start_url": TEST_PAGE_2},
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ timeout=__TIMEOUT,
+ action_mapping=None,
+ )
+ obs, info = env.reset()
+
+ dom = flatten_dom_to_str(obs["dom_object"])
+ assert isinstance(dom, str)
+ assert "Subscribe to newsletter" in dom
+ assert "Janice" not in dom
+
+ obs, reward, term, trunc, info = env.step(
+ f"""\
+page.get_by_label("Name:").click()
+page.get_by_label("Name:").fill("Janice")
+page.get_by_label("Name:").press("Tab")
+page.get_by_label("Email:").fill("janice@mail.com")
+page.get_by_label("Email:").press("Tab")
+page.get_by_label("Age:", exact=True).fill("21")
+page.get_by_label("Age:", exact=True).press("Tab")
+"""
+ )
+
+ dom = flatten_dom_to_str(obs["dom_object"])
+ assert "Janice" in dom
+ assert "janice@mail.com" in dom
+
+ dom = flatten_dom_to_str(
+ obs["dom_object"],
+ extra_properties=obs["extra_element_properties"],
+ with_visible=True,
+ with_clickable=True,
+ with_center_coords=True,
+ with_bounding_box_coords=True,
+ with_som=True,
+ )
+ assert 'box="(' in dom
+ assert 'center="(' in dom
+ assert 'clickable="" som="" type="submit" value="Submit" visible=""' in dom
+ assert 'head bid="1">' in dom
+ assert 'clickable="" for="email" visible=""' in dom
+ assert "Text within a non-html tag" in dom
+ assert "Text that should not be visible" in dom
+
+ dom = flatten_dom_to_str(
+ obs["dom_object"], extra_properties=obs["extra_element_properties"], filter_som_only=True
+ )
+ assert 'for="email"' not in dom
+ assert 'type="submit" value="Submit"' in dom
+ assert "Text within a non-html tag" not in dom
+ assert "Text that should not be visible" not in dom
+
+ dom = flatten_dom_to_str(
+ obs["dom_object"],
+ extra_properties=obs["extra_element_properties"],
+ filter_visible_only=True,
+ )
+ assert " None:
+ """
+ Args:
+ seed: random seed.
+ start_url: str, the url for the starting page.
+ goal: str, the initial goal.
+
+ """
+ super().__init__(seed)
+ self.start_url = start_url
+ self.goal = [
+ {"type": "text", "text": "This is a mock task with an image goal."},
+ {
+ "type": "image_url",
+ "image_url": "",
+ },
+ ]
+
+ def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
+ page.goto(self.start_url, timeout=10000)
+ return self.goal, {}
+
+ def teardown(self) -> None:
+ pass
+
+ def validate(
+ self, page: playwright.sync_api.Page, chat_messages: list[str]
+ ) -> Tuple[float, bool, str, dict]:
+ reward, done, msg, info = 0, False, "", {}
+
+ for message in chat_messages:
+ if message["role"] == "user" and message["message"] == "exit":
+ done = True
+ break
+
+ return reward, done, msg, info
+
+
+def test_mock_image_goal_task():
+ env = BrowserEnv(MockImageGoalTask)
+ obs, _ = env.reset()
+
+ assert "goal_object" in obs
+ assert len(obs["goal_object"]) == 2
+ assert obs["goal_object"][0]["type"] == "text"
+ assert obs["goal_object"][0]["text"] == "This is a mock task with an image goal."
+ assert obs["goal_object"][1]["type"] == "image_url"
+
+ env.chat.add_message("user", "exit")
+ obs, reward, terminated, _, _ = env.step("send_msg_to_user('bye')")
+
+ assert reward == 0
+ assert terminated is True
+
+ env.close()
+
+
+if __name__ == "__main__":
+ test_mock_image_goal_task()
diff --git a/BrowserGym/tests/experiments/__init__.py b/BrowserGym/tests/experiments/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..75f09d6fbde51609da41e1041eb3fb8125d808cb
--- /dev/null
+++ b/BrowserGym/tests/experiments/__init__.py
@@ -0,0 +1,2 @@
+# bugfix: use same playwright instance in browsergym and pytest
+from ..utils import setup_playwright
diff --git a/BrowserGym/tests/experiments/test_benchmark.py b/BrowserGym/tests/experiments/test_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..9222be11c98c628551499aab0d43cf218a0fcc30
--- /dev/null
+++ b/BrowserGym/tests/experiments/test_benchmark.py
@@ -0,0 +1,351 @@
+import dataclasses
+import os
+import random
+import re
+import tempfile
+
+import numpy as np
+import pytest
+
+from browsergym.core.action.base import AbstractActionSet
+from browsergym.experiments.agent import Agent
+from browsergym.experiments.benchmark import Benchmark, HighLevelActionSetArgs
+from browsergym.experiments.benchmark.configs import DEFAULT_BENCHMARKS
+from browsergym.experiments.benchmark.utils import make_env_args_list_from_fixed_seeds
+from browsergym.experiments.loop import AbstractAgentArgs, ExpArgs, get_exp_result
+from browsergym.utils.obs import flatten_axtree_to_str
+
+
+class MiniwobTestAgent(Agent):
+
+ def __init__(self, action_set: AbstractActionSet):
+ self.action_set = action_set
+
+ def obs_preprocessor(self, obs: dict):
+ return {"axtree_txt": flatten_axtree_to_str(obs["axtree_object"])}
+
+ def get_action(self, obs: dict) -> tuple[str, dict]:
+ match = re.search(r"^\s*\[(\d+)\].*button", obs["axtree_txt"], re.MULTILINE | re.IGNORECASE)
+
+ if match:
+ bid = match.group(1)
+ action = f'click("{bid}")'
+ else:
+ raise Exception("Can't find the button's bid")
+
+ return action, dict(think="I'm clicking the button as requested.")
+
+
+@dataclasses.dataclass
+class MiniwobTestAgentArgs(AbstractAgentArgs):
+ high_level_action_set: HighLevelActionSetArgs = None
+
+ def make_agent(self):
+ return MiniwobTestAgent(action_set=self.high_level_action_set.make_action_set())
+
+
+def test_build_benchmarks():
+ expected_bench_size = {
+ "miniwob": 125 * 5,
+ "miniwob_tiny_test": 2 * 2,
+ "webarena": 812,
+ "webarena_tiny": 6,
+ "visualwebarena": 910,
+ "visualwebarena_tiny": 4,
+ "workarena_l1": 33 * 10,
+ "workarena_l2_agent_curriculum_eval": 235,
+ "workarena_l3_agent_curriculum_eval": 235,
+ "assistantbench": 214,
+ "weblinx": 31586,
+ }
+ for name, benchmark_builder in DEFAULT_BENCHMARKS.items():
+ benchmark = benchmark_builder()
+ assert name == benchmark.name
+ assert benchmark.env_args_list # non-empty
+ assert benchmark.task_metadata is not None
+ assert len(benchmark.env_args_list) == expected_bench_size[name]
+ benchmark_bis = Benchmark.from_json(benchmark.to_json())
+ assert benchmark.to_dict() == benchmark_bis.to_dict()
+
+
+def test_benchmark_subset():
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["miniwob"]()
+
+ benchmark_subset = benchmark.subset_from_regexp(column="task_name", regexp="click")
+ assert len(benchmark_subset.env_args_list) == 31 * 5
+ assert benchmark_subset.name == "miniwob[task_name=/click/]"
+
+ benchmark_subset_1 = benchmark_subset.subset_from_regexp(
+ column="miniwob_category", regexp="original"
+ )
+ benchmark_subset_2 = benchmark_subset.subset_from_glob(
+ column="miniwob_category", glob="original"
+ )
+
+ assert benchmark_subset_1.name == "miniwob[task_name=/click/][miniwob_category=/original/]"
+ assert benchmark_subset_2.name == "miniwob[task_name=/click/][miniwob_category=original]"
+
+ dict_1 = benchmark_subset_1.to_dict()
+ dict_1.pop("name")
+ dict_2 = benchmark_subset_2.to_dict()
+ dict_2.pop("name")
+
+ assert dict_1 == dict_2
+
+
+def test_benchmark_subset_from_task_ratio():
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["webarena"]()
+
+ # Store initial random state
+ initial_state = random.getstate()
+
+ benchmark_subset = benchmark.subset_from_task_ratio(ratio=0.5, seed=1)
+ assert len(benchmark_subset.env_args_list) == 812 // 2
+ assert benchmark_subset.name == "webarena[ratio=0.5, seed=1]"
+
+ # Verify global random state hasn't changed
+ assert random.getstate() == initial_state
+
+ benchmark_subset_1 = benchmark_subset.subset_from_task_ratio(ratio=0.5, seed=1)
+ benchmark_subset_2 = benchmark_subset.subset_from_task_ratio(ratio=0.5, seed=2)
+
+ # Verify global random state still hasn't changed
+ assert random.getstate() == initial_state
+
+ # Check the task lists are different
+ assert not np.all(
+ [
+ env_args.task_name == env_args_2.task_name
+ for env_args, env_args_2 in zip(
+ benchmark_subset_1.env_args_list, benchmark_subset_2.env_args_list
+ )
+ ]
+ )
+
+ dict_1 = benchmark_subset_1.to_dict()
+ dict_1.pop("name")
+ dict_2 = benchmark_subset_2.to_dict()
+ dict_2.pop("name")
+ assert len(dict_1["env_args_list"]) == len(dict_2["env_args_list"])
+ assert dict_1 != dict_2
+
+
+def test_prepare_backend_miniwob():
+ MINIWOB_URL = os.environ["MINIWOB_URL"]
+ try:
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["miniwob"]()
+
+ benchmark.prepare_backends()
+
+ del os.environ["MINIWOB_URL"]
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+
+ os.environ["MINIWOB_URL"] = ""
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+ finally:
+ os.environ["MINIWOB_URL"] = MINIWOB_URL
+
+
+def test_prepare_backend_assistantbench():
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["assistantbench"]()
+ benchmark.prepare_backends()
+
+
+@pytest.mark.skip
+def test_prepare_backend_webarena():
+ WA_FULL_RESET = os.environ["WA_FULL_RESET"]
+ try:
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["webarena"]()
+
+ benchmark.prepare_backends()
+
+ del os.environ["WA_FULL_RESET"]
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+
+ os.environ["WA_FULL_RESET"] = "http://localhost:12345/reset"
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+ finally:
+ os.environ["WA_FULL_RESET"] = WA_FULL_RESET
+
+
+@pytest.mark.skip
+def test_prepare_backend_visualwebarena():
+ VWA_FULL_RESET = os.environ["VWA_FULL_RESET"]
+ try:
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["visualwebarena"]()
+
+ benchmark.prepare_backends()
+
+ del os.environ["VWA_FULL_RESET"]
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+
+ os.environ["VWA_FULL_RESET"] = "http://localhost:12345/reset"
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+ finally:
+ os.environ["VWA_FULL_RESET"] = VWA_FULL_RESET
+
+
+@pytest.mark.skip
+def test_prepare_backend_weblinx():
+ BROWSERGYM_WEBLINX_CACHE_DIR = os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"]
+ try:
+ benchmark: Benchmark = DEFAULT_BENCHMARKS["weblinx"]()
+
+ benchmark.prepare_backends()
+
+ del os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"]
+ with pytest.raises(Exception):
+ benchmark.prepare_backends()
+
+ finally:
+ os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"] = BROWSERGYM_WEBLINX_CACHE_DIR
+
+
+def test_run_mock_benchmark():
+ benchmark = Benchmark(
+ name="miniwob_click_test",
+ high_level_action_set_args=HighLevelActionSetArgs(
+ subsets=["bid"],
+ multiaction=False,
+ strict=False,
+ retry_with_force=True,
+ demo_mode="off",
+ ),
+ is_multi_tab=False,
+ supports_parallel_seeds=True,
+ backends=["miniwob"],
+ env_args_list=make_env_args_list_from_fixed_seeds(
+ task_list=["miniwob.click-test"],
+ max_steps=5,
+ fixed_seeds=[0, 1],
+ ),
+ )
+
+ for env_args in benchmark.env_args_list:
+ agent_args = MiniwobTestAgentArgs(
+ high_level_action_set=benchmark.high_level_action_set_args
+ )
+ exp_args = ExpArgs(
+ agent_args=agent_args,
+ env_args=env_args,
+ )
+
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ exp_args.prepare(tmp_dir)
+ exp_args.run()
+ exp_result = get_exp_result(exp_args.exp_dir)
+ exp_record = exp_result.get_exp_record()
+
+ target = {
+ "env_args.task_name": "miniwob.click-test",
+ "env_args.headless": True,
+ "env_args.record_video": False,
+ "n_steps": 1,
+ "cum_reward": 1.0,
+ "terminated": True,
+ "truncated": False,
+ }
+
+ assert len(exp_result.steps_info) == 2
+
+ for key, target_val in target.items():
+ assert key in exp_record
+ assert exp_record[key] == target_val
+
+
+def test_dependency_graphs():
+ benchmark = Benchmark(
+ name="my_bench",
+ high_level_action_set_args=HighLevelActionSetArgs(
+ subsets=["bid"],
+ multiaction=False,
+ strict=False,
+ retry_with_force=True,
+ demo_mode="off",
+ ),
+ is_multi_tab=False,
+ supports_parallel_seeds=True,
+ backends=["miniwob"],
+ env_args_list=make_env_args_list_from_fixed_seeds(
+ task_list=["miniwob.click-test"],
+ max_steps=5,
+ fixed_seeds=[0, 1],
+ ),
+ )
+
+ # one task, two seeds
+ task_dependencies = benchmark.dependency_graph_over_tasks()
+ assert task_dependencies == {"miniwob.click-test": []}
+
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert env_args_dependencies == [{0: [], 1: []}]
+
+ # change to no parallel seed support
+ benchmark.supports_parallel_seeds = False
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert env_args_dependencies == [{0: []}, {1: []}]
+
+ # webarena, 3 tasks x 1 seed
+ benchmark = DEFAULT_BENCHMARKS["webarena"]().subset_from_regexp(
+ column="task_name", regexp=r"^webarena\.[012]$"
+ )
+
+ task_dependencies = benchmark.dependency_graph_over_tasks()
+ assert task_dependencies == {
+ "webarena.0": [],
+ "webarena.1": ["webarena.0"],
+ "webarena.2": ["webarena.1"],
+ }
+
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert env_args_dependencies == [{0: [], 1: [0], 2: [1]}]
+
+ # workarena L2, 2 task x (2 seeds, 1 seed)
+ benchmark = DEFAULT_BENCHMARKS["workarena_l2_agent_curriculum_eval"]().subset_from_regexp(
+ column="task_name",
+ regexp=r"^workarena\.servicenow\.workload-balancing-small-l2$|^workarena\.servicenow\.easy-expense-management-small-l2$",
+ )
+
+ task_dependencies = benchmark.dependency_graph_over_tasks()
+ assert task_dependencies == {
+ "workarena.servicenow.workload-balancing-small-l2": [],
+ "workarena.servicenow.easy-expense-management-small-l2": [],
+ }
+
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert env_args_dependencies == [{0: [], 1: [], 2: []}]
+
+ # change to no parallel seed support
+ benchmark.supports_parallel_seeds = False
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert env_args_dependencies == [{0: [], 2: []}, {1: []}]
+
+ # webarena, 6 dependent tasks x 1 seed
+ benchmark = DEFAULT_BENCHMARKS["webarena"]().subset_from_regexp(
+ column="task_name",
+ regexp=r"^webarena\.533$|^webarena\.537$|^webarena\.552$|^webarena\.410$|^webarena\.561$|^webarena\.562$",
+ )
+
+ task_dependencies = benchmark.dependency_graph_over_tasks()
+ assert {k: set(v) for k, v in task_dependencies.items()} == {
+ k: set(v)
+ for k, v in {
+ "webarena.410": [],
+ "webarena.533": [],
+ "webarena.537": ["webarena.533"],
+ "webarena.552": ["webarena.410", "webarena.537"],
+ "webarena.561": ["webarena.552"],
+ "webarena.562": ["webarena.552", "webarena.561"],
+ }.items()
+ }
+
+ env_args_dependencies = benchmark.dependency_graphs_over_env_args()
+ assert [{k: set(v) for k, v in deps.items()} for deps in env_args_dependencies] == [
+ {k: set(v) for k, v in {0: [], 1: [], 2: [1], 3: [0, 2], 4: [3], 5: [3, 4]}.items()}
+ ]
diff --git a/BrowserGym/tests/experiments/test_bgym.py b/BrowserGym/tests/experiments/test_bgym.py
new file mode 100644
index 0000000000000000000000000000000000000000..193822caa4dd95bc7774b96ab390514c96a30f21
--- /dev/null
+++ b/BrowserGym/tests/experiments/test_bgym.py
@@ -0,0 +1,9 @@
+import bgym
+import pytest
+
+
+def test_classes():
+ bgym.EnvArgs(task_name="something")
+ bgym.HighLevelActionSet()
+ with pytest.raises(TypeError):
+ bgym.Agent()
diff --git a/BrowserGym/tests/experiments/test_exp_loop.py b/BrowserGym/tests/experiments/test_exp_loop.py
new file mode 100644
index 0000000000000000000000000000000000000000..a954f9b7f5e1fedcfd413c4490c762ff23d4aa9a
--- /dev/null
+++ b/BrowserGym/tests/experiments/test_exp_loop.py
@@ -0,0 +1,72 @@
+import re
+import tempfile
+import logging
+import dataclasses
+
+from browsergym.core.action.highlevel import HighLevelActionSet
+from browsergym.experiments.agent import Agent
+from browsergym.experiments.loop import AbstractAgentArgs, EnvArgs, ExpArgs, get_exp_result
+from browsergym.utils.obs import flatten_axtree_to_str
+
+
+class MiniwobTestAgent(Agent):
+
+ action_set = HighLevelActionSet(subsets="bid")
+
+ def obs_preprocessor(self, obs: dict):
+ return {"axtree_txt": flatten_axtree_to_str(obs["axtree_object"])}
+
+ def get_action(self, obs: dict) -> tuple[str, dict]:
+ match = re.search(r"^\s*\[(\d+)\].*button", obs["axtree_txt"], re.MULTILINE | re.IGNORECASE)
+
+ if match:
+ bid = match.group(1)
+ action = f'click("{bid}")'
+ else:
+ raise Exception("Can't find the button's bid")
+
+ return action, dict(think="I'm clicking the button as requested.")
+
+
+@dataclasses.dataclass
+class MiniwobTestAgentArgs(AbstractAgentArgs):
+ def make_agent(self):
+ return MiniwobTestAgent()
+
+
+def test_run_exp():
+ exp_args = ExpArgs(
+ agent_args=MiniwobTestAgentArgs(),
+ env_args=EnvArgs(task_name="miniwob.click-test", task_seed=42),
+ )
+
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ exp_args.prepare(tmp_dir)
+ exp_args.run()
+ exp_result = get_exp_result(exp_args.exp_dir)
+ exp_record = exp_result.get_exp_record()
+
+ target = {
+ "env_args.task_name": "miniwob.click-test",
+ "env_args.task_seed": 42,
+ "env_args.headless": True,
+ "env_args.record_video": False,
+ "n_steps": 1,
+ "cum_reward": 1.0,
+ "terminated": True,
+ "truncated": False,
+ }
+
+ assert len(exp_result.steps_info) == 2
+
+ for key, target_val in target.items():
+ assert key in exp_record
+ assert exp_record[key] == target_val
+
+ # TODO investigate why it's taking almost 5 seconds to solve
+ assert exp_record["stats.cum_step_elapsed"] < 5
+ if exp_record["stats.cum_step_elapsed"] > 3:
+ t = exp_record["stats.cum_step_elapsed"]
+ logging.warning(
+ f"miniwob.click-test is taking {t:.2f}s (> 3s) to solve with an oracle."
+ )
diff --git a/BrowserGym/tests/miniwob/__init__.py b/BrowserGym/tests/miniwob/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..75f09d6fbde51609da41e1041eb3fb8125d808cb
--- /dev/null
+++ b/BrowserGym/tests/miniwob/__init__.py
@@ -0,0 +1,2 @@
+# bugfix: use same playwright instance in browsergym and pytest
+from ..utils import setup_playwright
diff --git a/BrowserGym/tests/miniwob/test_base.py b/BrowserGym/tests/miniwob/test_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe0fdf330f7e08c148206ecbed9ad8692135ea2f
--- /dev/null
+++ b/BrowserGym/tests/miniwob/test_base.py
@@ -0,0 +1,196 @@
+import os
+import pytest
+import time
+import gymnasium as gym
+
+# register gym environments
+import browsergym.miniwob
+
+from browsergym.miniwob.all import (
+ ClickButtonTask,
+ ClickOptionTask,
+ DrawLineTask,
+ LoginUserTask,
+)
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+TASKS = [ClickButtonTask, ClickOptionTask, DrawLineTask, LoginUserTask]
+
+
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_validate_teardown(task_cls):
+ pw = browsergym.core._get_global_playwright()
+
+ browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42)
+ task.setup(page=page)
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert done is False
+
+ task.teardown()
+
+ context.close()
+ browser.close()
+
+
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_episode_max_time(task_cls):
+ pw = browsergym.core._get_global_playwright()
+
+ browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42, episode_max_time=0.2)
+ task.setup(page=page)
+
+ time.sleep(0.5)
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert done is True
+ assert reward == 0
+
+ task.teardown()
+
+ context.close()
+ browser.close()
+
+
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_remove_human_display(task_cls):
+ pw = browsergym.core._get_global_playwright()
+
+ browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
+
+ # remove display
+
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42, remove_human_display=True)
+ task.setup(page=page)
+
+ for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
+ element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
+ assert not element_in_dom
+
+ assert page.evaluate(f"document.getElementById('query').innerHTML") == ""
+
+ for element_id in ["wrap", "area"]:
+ element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
+ assert element_in_dom
+
+ task.teardown()
+
+ context.close()
+
+ # keep display
+
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42, remove_human_display=False)
+ task.setup(page=page)
+
+ for element_id in ["reward-display", "click-canvas", "sync-task-cover"]:
+ element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
+ assert element_in_dom
+
+ assert page.evaluate(f"document.getElementById('query').innerHTML") != ""
+
+ for element_id in ["wrap", "area"]:
+ element_in_dom = page.evaluate(f"!!document.getElementById('{element_id}')")
+ assert element_in_dom
+
+ task.teardown()
+
+ context.close()
+ browser.close()
+
+
+@pytest.mark.skip(reason="TODO: how to get the final viewport size right?")
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_viewport(task_cls):
+ env = gym.make(
+ f"browsergym/{task_cls.get_task_id()}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset(seed=42)
+
+ screenshot = obs["screenshot"]
+
+ # 3D array (height, width, rgb) of unsigned bytes (between 0 and 255)
+ # Miniwob viewport should be (320x500)
+ assert screenshot.shape[0] == 320
+ assert screenshot.shape[1] == 500
+ assert screenshot.shape[2] == 3 # RGB
+
+ env.close()
+
+
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_forbidden_navigation(task_cls):
+ pw = browsergym.core._get_global_playwright()
+
+ browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42)
+ task.setup(page=page)
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert reward == 0.0 and done == False
+
+ page.goto("http://www.google.com")
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert reward == 0.0 and done == True
+
+ task.teardown()
+
+ context.close()
+ browser.close()
+
+
+@pytest.mark.parametrize("task_cls", TASKS)
+def test_forbidden_navigation_2(task_cls):
+ pw = browsergym.core._get_global_playwright()
+
+ browser = pw.chromium.launch(headless=__HEADLESS, slow_mo=__SLOW_MO)
+ context = browser.new_context()
+ page = context.new_page()
+
+ task = task_cls(seed=42)
+ task.setup(page=page)
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert reward == 0.0 and done == False
+
+ page2 = context.new_page()
+ page2.goto("http://www.google.com")
+
+ reward, done, msg, info = task.validate(page, [])
+
+ assert reward == 0.0 and done == False
+
+ reward, done, msg, info = task.validate(page2, [])
+
+ assert reward == 0.0 and done == True
+
+ task.teardown()
+
+ context.close()
+ browser.close()
diff --git a/BrowserGym/tests/miniwob/test_click-menu-2.py b/BrowserGym/tests/miniwob/test_click-menu-2.py
new file mode 100644
index 0000000000000000000000000000000000000000..8296da1cecff67321892a9b94aca1ba58febbd12
--- /dev/null
+++ b/BrowserGym/tests/miniwob/test_click-menu-2.py
@@ -0,0 +1,81 @@
+import os
+import gymnasium as gym
+import re
+import pytest
+
+# register gym environments
+import browsergym.miniwob
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+@pytest.mark.parametrize("seed", range(5))
+def test_cheat(seed):
+ env = gym.make(
+ "browsergym/miniwob.click-menu-2",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ action_mapping=None,
+ )
+ obs, info = env.reset(seed=seed)
+
+ assert obs["last_action_error"] == ""
+
+ match1 = re.match(
+ 'Click the "Menu" button, and then find and click on the item labeled "(.+)".', obs["goal"]
+ )
+ match2 = re.match(
+ 'Click the "Menu" button, and then find and click on the item with the "(.+)" icon.',
+ obs["goal"],
+ )
+
+ assert match1 or match2
+
+ if match1:
+ item_label = match1.groups()[0]
+ item_classname = {
+ "Save": "ui-icon-disk",
+ "Prev": "ui-icon-seek-start",
+ "Stop": "ui-icon-stop",
+ "Play": "ui-icon-play",
+ "Next": "ui-icon-seek-end",
+ "Zoom In": "ui-icon-zoomin",
+ "Zoom Out": "ui-icon-zoomout",
+ }[item_label]
+ else:
+ item_classname = match2.groups()[0]
+
+ action = f"""\
+page.get_by_text("Menu").click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ assert obs["last_action_error"] == ""
+ assert reward == 0
+ assert term == False
+
+ if item_classname in ("ui-icon-seek-start", "ui-icon-stop", "ui-icon-play", "ui-icon-seek-end"):
+
+ action = f"""\
+page.get_by_text("Playback").click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ assert obs["last_action_error"] == ""
+ assert reward == 0
+ assert term == False
+
+ action = f"""\
+page.locator(".{item_classname}").click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ assert obs["last_action_error"] == ""
+ assert reward == 1
+ assert term == True
+
+ env.close()
diff --git a/BrowserGym/tests/miniwob/test_click-scroll-list.py b/BrowserGym/tests/miniwob/test_click-scroll-list.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f16cd7c2fd14fcf364abc5025d10067689ec8ee
--- /dev/null
+++ b/BrowserGym/tests/miniwob/test_click-scroll-list.py
@@ -0,0 +1,42 @@
+import os
+import gymnasium as gym
+import re
+import pytest
+
+# register gym environments
+import browsergym.miniwob
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+@pytest.mark.parametrize("seed", range(5))
+def test_cheat(seed):
+ env = gym.make(
+ "browsergym/miniwob.click-scroll-list",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ action_mapping=None,
+ )
+ obs, info = env.reset(seed=seed)
+
+ assert obs["last_action_error"] == ""
+
+ match = re.match("Select (.+) from the scroll list and click Submit.", obs["goal"])
+
+ assert match
+
+ options = match.groups()[0].split(", ")
+ options = '", "'.join(options)
+ action = f"""\
+page.locator("#options").select_option(["{options}"])
+page.get_by_role("button", name="Submit").click()
+"""
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ assert obs["last_action_error"] == ""
+ assert reward == 1
+ assert term == True
+
+ env.close()
diff --git a/BrowserGym/tests/miniwob/test_use-colorwheel-2.py b/BrowserGym/tests/miniwob/test_use-colorwheel-2.py
new file mode 100644
index 0000000000000000000000000000000000000000..45d660d431bc97aa152f658f63306a8e14f611b7
--- /dev/null
+++ b/BrowserGym/tests/miniwob/test_use-colorwheel-2.py
@@ -0,0 +1,44 @@
+import os
+import gymnasium as gym
+import re
+import pytest
+
+# register gym environments
+import browsergym.miniwob
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+@pytest.mark.parametrize("seed", range(5))
+def test_cheat(seed):
+ env = gym.make(
+ "browsergym/miniwob.use-colorwheel-2",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ action_mapping=None,
+ )
+ obs, info = env.reset(seed=42)
+
+ assert obs["last_action_error"] == ""
+
+ match = re.match(
+ "Select the following color #(.+) with the color picker and hit Submit.", obs["goal"]
+ )
+
+ assert match
+
+ color = match.groups()[0].upper()
+
+ obs, reward, term, trunc, info = env.step(
+ f"""\
+page.locator("#col").fill("{color}")
+page.get_by_role("button", name="Submit").click()
+"""
+ )
+
+ assert obs["last_action_error"] == ""
+ assert reward == 1
+ assert term == True
+
+ env.close()
diff --git a/BrowserGym/tests/utils.py b/BrowserGym/tests/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..48595751f2a9aa8070bfebe6ae57142ff3d0d653
--- /dev/null
+++ b/BrowserGym/tests/utils.py
@@ -0,0 +1,13 @@
+import browsergym.core
+import logging
+import playwright.sync_api
+import pytest
+
+
+# setup code, executed ahead of first test
+@pytest.fixture(scope="session", autouse=True)
+def setup_playwright(playwright: playwright.sync_api.Playwright):
+ # bugfix: re-use pytest-playwright's playwright instance in browsergym
+ # https://github.com/microsoft/playwright-python/issues/2053
+ browsergym.core._set_global_playwright(playwright)
+ logging.info("Browsergym is using the playwright instance provided by pytest-playwright.")
diff --git a/BrowserGym/tests/visualwebarena/__init__.py b/BrowserGym/tests/visualwebarena/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..75f09d6fbde51609da41e1041eb3fb8125d808cb
--- /dev/null
+++ b/BrowserGym/tests/visualwebarena/__init__.py
@@ -0,0 +1,2 @@
+# bugfix: use same playwright instance in browsergym and pytest
+from ..utils import setup_playwright
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_domains.py b/BrowserGym/tests/visualwebarena/test_vwa_domains.py
new file mode 100644
index 0000000000000000000000000000000000000000..80d4a4256e8bb523dc447904fd8561b748d75b0e
--- /dev/null
+++ b/BrowserGym/tests/visualwebarena/test_vwa_domains.py
@@ -0,0 +1,25 @@
+import pytest
+import playwright.sync_api
+
+from browsergym.visualwebarena.instance import VisualWebArenaInstance
+
+
+def test_is_reachable():
+ # default URLs
+ instance = VisualWebArenaInstance()
+ instance.check_status()
+
+ # unreacheable URL
+ with pytest.raises(RuntimeError):
+ instance = VisualWebArenaInstance()
+ instance.urls["reddit"] = "https://invalid.url"
+ instance.check_status()
+
+
+@pytest.mark.parametrize("site", ["reddit", "shopping", "wikipedia", "classifieds"])
+def test_credentials(page: playwright.sync_api.Page, site: str):
+ # default URLs and credentials
+ instance = VisualWebArenaInstance()
+ instance.ui_login(site=site, page=page)
+
+ # TODO: test this more thoroughly
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py b/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e586d2a777934063c196903e30c7beeb503cb6fb
--- /dev/null
+++ b/BrowserGym/tests/visualwebarena/test_vwa_tasks_with_reset.py
@@ -0,0 +1,40 @@
+import logging
+import os
+import random
+
+import gymnasium as gym
+import playwright.sync_api
+import pytest
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
+
+# register gym environments
+import browsergym.visualwebarena
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITH_RESET
+
+rng = random.Random(1)
+task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITH_RESET, 10)
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ wait=wait_fixed(2),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+@pytest.mark.parametrize("task_id", task_ids)
+@pytest.mark.slow
+@pytest.mark.serial
+def test_env_generic(task_id):
+ env = gym.make(
+ f"browsergym/{task_id}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+ env.close()
diff --git a/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py b/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3fad322381d8ceed88dae3ae3449d6da02fb197
--- /dev/null
+++ b/BrowserGym/tests/visualwebarena/test_vwa_tasks_without_reset.py
@@ -0,0 +1,74 @@
+import logging
+import os
+import random
+
+import gymnasium as gym
+import playwright.sync_api
+import pytest
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
+
+# register gym environments
+import browsergym.visualwebarena
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+from browsergym.visualwebarena import VISUALWEBARENA_TASK_IDS_WITHOUT_RESET
+
+rng = random.Random(1)
+task_ids = rng.sample(VISUALWEBARENA_TASK_IDS_WITHOUT_RESET, 25)
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ wait=wait_fixed(2),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+@pytest.mark.parametrize("task_id", task_ids)
+@pytest.mark.slow
+def test_env_generic(task_id):
+ env = gym.make(
+ f"browsergym/{task_id}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+ env.close()
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ wait=wait_fixed(2),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+def test_domain_safeguard():
+ env = gym.make(
+ f"browsergym/visualwebarena.398",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+ assert not obs["last_action_error"]
+
+ obs, reward, terminated, truncated, info = env.step("new_tab()")
+ assert not obs["last_action_error"]
+ assert not (terminated or truncated)
+
+ obs, reward, terminated, truncated, info = env.step("tab_close()")
+ assert not obs["last_action_error"]
+ assert not (terminated or truncated)
+
+ obs, reward, terminated, truncated, info = env.step("tab_focus(0)")
+ assert not obs["last_action_error"]
+ assert not (terminated or truncated)
+
+ obs, reward, terminated, truncated, info = env.step('goto("http://www.google.com")')
+ assert not obs["last_action_error"]
+ assert terminated
+
+ env.close()
diff --git a/BrowserGym/tests/webarena/__init__.py b/BrowserGym/tests/webarena/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..75f09d6fbde51609da41e1041eb3fb8125d808cb
--- /dev/null
+++ b/BrowserGym/tests/webarena/__init__.py
@@ -0,0 +1,2 @@
+# bugfix: use same playwright instance in browsergym and pytest
+from ..utils import setup_playwright
diff --git a/BrowserGym/tests/webarena/test_env_general.py b/BrowserGym/tests/webarena/test_env_general.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4a81b23a7a34d376ed8048e71b31606d91f589d
--- /dev/null
+++ b/BrowserGym/tests/webarena/test_env_general.py
@@ -0,0 +1,40 @@
+import gymnasium as gym
+import logging
+import os
+import playwright.sync_api
+import pytest
+import random
+
+from tenacity import retry, stop_after_attempt, retry_if_exception_type
+
+# register gym environments
+import browsergym.webarena
+
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+
+from browsergym.webarena import ALL_WEBARENA_TASK_IDS
+
+rng = random.Random(1)
+task_ids = rng.sample(ALL_WEBARENA_TASK_IDS, 25)
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+@pytest.mark.parametrize("task_id", task_ids)
+@pytest.mark.slow
+def test_env_generic(task_id):
+ env = gym.make(
+ f"browsergym/{task_id}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+
+ env.close()
diff --git a/BrowserGym/tests/webarena/test_infeasible.py b/BrowserGym/tests/webarena/test_infeasible.py
new file mode 100644
index 0000000000000000000000000000000000000000..044b5c404558739529e159d9dd5c357156c90ec8
--- /dev/null
+++ b/BrowserGym/tests/webarena/test_infeasible.py
@@ -0,0 +1,50 @@
+import gymnasium as gym
+import logging
+import os
+import playwright.sync_api
+import pytest
+
+from tenacity import retry, stop_after_attempt, retry_if_exception_type
+
+# register gym environments
+import browsergym.webarena
+
+
+__SLOW_MO = 1000 if "DISPLAY_BROWSER" in os.environ else None
+__HEADLESS = False if "DISPLAY_BROWSER" in os.environ else True
+
+INFEAS_TASK_IDS = [101, 115, 166]
+FEAS_TASK_IDS = [165, 187, 199]
+
+
+@retry(
+ stop=stop_after_attempt(5),
+ retry=retry_if_exception_type(playwright.sync_api.TimeoutError),
+ reraise=True,
+ before_sleep=lambda _: logging.info("Retrying due to a TimeoutError..."),
+)
+@pytest.mark.parametrize(
+ "task_id,infeasible",
+ [(task_id, True) for task_id in INFEAS_TASK_IDS]
+ + [(task_id, False) for task_id in FEAS_TASK_IDS],
+)
+@pytest.mark.slow
+def test_infeasible(task_id, infeasible):
+ env = gym.make(
+ f"browsergym/webarena.{task_id}",
+ headless=__HEADLESS,
+ slow_mo=__SLOW_MO,
+ )
+ obs, info = env.reset()
+
+ action = 'report_infeasible("Unachievable task.")'
+
+ obs, reward, term, trunc, info = env.step(action)
+
+ if infeasible:
+ assert term == True and reward == 1.0
+
+ else:
+ assert term == True and reward == 0.0
+
+ env.close()
diff --git a/BrowserGym/tests/webarena/test_instance.py b/BrowserGym/tests/webarena/test_instance.py
new file mode 100644
index 0000000000000000000000000000000000000000..a538a53f97c7372f72a99445b62843ce30d0c9e7
--- /dev/null
+++ b/BrowserGym/tests/webarena/test_instance.py
@@ -0,0 +1,27 @@
+import pytest
+import playwright.sync_api
+
+from browsergym.webarena.instance import WebArenaInstance
+
+
+def test_is_reachable():
+ # default URLs
+ instance = WebArenaInstance()
+ instance.check_status()
+
+ # unreacheable URL
+ with pytest.raises(RuntimeError):
+ instance = WebArenaInstance()
+ instance.urls["reddit"] = "https://invalid.url"
+ instance.check_status()
+
+
+@pytest.mark.parametrize(
+ "site", ["reddit", "shopping", "shopping_admin", "gitlab", "wikipedia", "map"]
+)
+def test_credentials(page: playwright.sync_api.Page, site: str):
+ # default URLs and credentials
+ instance = WebArenaInstance()
+ instance.ui_login(site=site, page=page)
+
+ # TODO: test this more thoroughly
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..ff0ceb3b8813ce542604461b9575ef06ff32ffff
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM mcr.microsoft.com/playwright:v1.41.2-jammy
+
+WORKDIR /code
+
+#COPY package.json ./package.json
+
+RUN npm i playwright@1.41.2
+RUN npm i web-locks
+RUN npm i ws
+RUN npm i user-agents
+RUN npm i uuid
+
+COPY . .
+
+CMD [ "node", "index.mjs" ]
+RUN pip install -e BrowserGym/
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..09d8be7dd45caa28cba1fb903c6edb2941dc96d7
--- /dev/null
+++ b/app.py
@@ -0,0 +1,454 @@
+import os
+import json
+import base64
+import io
+import argparse
+import logging
+import gradio as gr
+import openai
+import gymnasium as gym
+import browsergym.core
+from PIL import Image
+import numpy as np
+from browsergym.core.action.highlevel import HighLevelActionSet
+from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, prune_html
+from browsergym.experiments import Agent
+from dotenv import load_dotenv
+import cv2
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(levelname)s - %(message)s',
+ handlers=[
+ logging.StreamHandler(),
+ logging.FileHandler('browser_agent.log')
+ ]
+)
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+# Set your OpenAI API key
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+# Example instructions to display
+EXAMPLES = [
+ "Search for the latest AI news on Google",
+ "Go to Wikipedia and find the population of Seoul",
+ "Open YouTube and play the top trending video",
+]
+
+def str2bool(v):
+ if isinstance(v, bool):
+ return v
+ if v.lower() in ("yes", "true", "t", "y", "1"):
+ return True
+ elif v.lower() in ("no", "false", "f", "n", "0"):
+ return False
+ else:
+ raise argparse.ArgumentTypeError("Boolean value expected.")
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run BrowserGym web agent.")
+ parser.add_argument(
+ "--model_name",
+ type=str,
+ default="gpt-4o",
+ help="OpenAI model name.",
+ )
+ parser.add_argument(
+ "--start_url",
+ type=str,
+ default="https://www.duckduckgo.com",
+ help="Starting URL for the openended task.",
+ )
+ parser.add_argument(
+ "--visual_effects",
+ type=str2bool,
+ default=True,
+ help="Add visual effects when the agent performs actions.",
+ )
+ parser.add_argument(
+ "--use_html",
+ type=str2bool,
+ default=False,
+ help="Use HTML in the agent's observation space.",
+ )
+ parser.add_argument(
+ "--use_axtree",
+ type=str2bool,
+ default=True,
+ help="Use AXTree in the agent's observation space.",
+ )
+ parser.add_argument(
+ "--use_screenshot",
+ type=str2bool,
+ default=False,
+ help="Use screenshot in the agent's observation space.",
+ )
+ parser.add_argument(
+ "--log_level",
+ type=str,
+ default="INFO",
+ choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+ help="Set the logging level.",
+ )
+ return parser.parse_args()
+
+def image_to_jpg_base64_url(image: np.ndarray | Image.Image):
+ """Convert a numpy array to a base64 encoded image url."""
+ if isinstance(image, np.ndarray):
+ image = Image.fromarray(image)
+ if image.mode in ("RGBA", "LA"):
+ image = image.convert("RGB")
+
+ with io.BytesIO() as buffer:
+ image.save(buffer, format="JPEG")
+ image_base64 = base64.b64encode(buffer.getvalue()).decode()
+
+ return f"data:image/jpeg;base64,{image_base64}"
+
+class BrowserAgent(Agent):
+ def obs_preprocessor(self, obs: dict) -> dict:
+ return {
+ "chat_messages": obs["chat_messages"],
+ "screenshot": obs["screenshot"],
+ "goal_object": obs["goal_object"],
+ "last_action": obs["last_action"],
+ "last_action_error": obs["last_action_error"],
+ "open_pages_urls": obs["open_pages_urls"],
+ "open_pages_titles": obs["open_pages_titles"],
+ "active_page_index": obs["active_page_index"],
+ "axtree_txt": flatten_axtree_to_str(obs["axtree_object"], filter_visible_only=True, extra_properties=obs['extra_element_properties']),
+ "pruned_html": prune_html(flatten_dom_to_str(obs["dom_object"])),
+ }
+
+ def __init__(self, model_name: str = "gpt-4o", use_html: bool = False, use_axtree: bool = True, use_screenshot: bool = False):
+ super().__init__()
+ logger.info(f"Initializing BrowserAgent with model: {model_name}")
+ logger.info(f"Observation space: HTML={use_html}, AXTree={use_axtree}, Screenshot={use_screenshot}")
+
+ self.model_name = model_name
+ self.use_html = use_html
+ self.use_axtree = use_axtree
+ self.use_screenshot = use_screenshot
+
+ if not (use_html or use_axtree):
+ raise ValueError("Either use_html or use_axtree must be set to True.")
+
+ self.openai_client = openai.OpenAI()
+
+ self.action_set = HighLevelActionSet(
+ subsets=["chat", "tab", "nav", "bid", "infeas"],
+ strict=False,
+ multiaction=False,
+ demo_mode="default"
+ )
+ self.action_history = []
+
+ def get_action(self, obs: dict) -> tuple[str, dict]:
+ logger.debug("Preparing action request")
+
+ system_msgs = [{
+ "type": "text",
+ "text": """\
+# Instructions
+
+You are a UI Assistant, your goal is to help the user perform tasks using a web browser. You can
+communicate with the user via a chat, to which the user gives you instructions and to which you
+can send back messages. You have access to a web browser that both you and the user can see,
+and with which only you can interact via specific commands.
+
+Review the instructions from the user, the current state of the page and all other information
+to find the best possible next action to accomplish your goal. Your answer will be interpreted
+and executed by a program, make sure to follow the formatting instructions.
+"""
+ }]
+
+ user_msgs = []
+
+ # Add chat messages
+ user_msgs.append({
+ "type": "text",
+ "text": "# Chat Messages\n"
+ })
+ for msg in obs["chat_messages"]:
+ if msg["role"] in ("user", "assistant", "infeasible"):
+ user_msgs.append({
+ "type": "text",
+ "text": f"- [{msg['role']}] {msg['message']}\n"
+ })
+ logger.debug(f"Added chat message: [{msg['role']}] {msg['message']}")
+ elif msg["role"] == "user_image":
+ user_msgs.append({"type": "image_url", "image_url": msg["message"]})
+ logger.debug("Added user image message")
+
+ # Add open tabs info
+ user_msgs.append({
+ "type": "text",
+ "text": "# Currently open tabs\n"
+ })
+ for page_index, (page_url, page_title) in enumerate(
+ zip(obs["open_pages_urls"], obs["open_pages_titles"])
+ ):
+ user_msgs.append({
+ "type": "text",
+ "text": f"""\
+Tab {page_index}{" (active tab)" if page_index == obs["active_page_index"] else ""}
+ Title: {page_title}
+ URL: {page_url}
+"""
+ })
+ logger.debug(f"Added tab info: {page_title} ({page_url})")
+
+ # Add accessibility tree if enabled
+ if self.use_axtree:
+ user_msgs.append({
+ "type": "text",
+ "text": f"""\
+# Current page Accessibility Tree
+
+{obs["axtree_txt"]}
+
+"""
+ })
+ logger.debug("Added accessibility tree")
+
+ # Add HTML if enabled
+ if self.use_html:
+ user_msgs.append({
+ "type": "text",
+ "text": f"""\
+# Current page DOM
+
+{obs["pruned_html"]}
+
+"""
+ })
+ logger.debug("Added HTML DOM")
+
+ # Add screenshot if enabled
+ if self.use_screenshot:
+ user_msgs.append({
+ "type": "text",
+ "text": "# Current page Screenshot\n"
+ })
+ user_msgs.append({
+ "type": "image_url",
+ "image_url": {
+ "url": image_to_jpg_base64_url(obs["screenshot"]),
+ "detail": "auto"
+ }
+ })
+ logger.debug("Added screenshot")
+
+ # Add action space description
+ user_msgs.append({
+ "type": "text",
+ "text": f"""\
+# Action Space
+
+{self.action_set.describe(with_long_description=False, with_examples=True)}
+
+Here are examples of actions with chain-of-thought reasoning:
+
+I now need to click on the Submit button to send the form. I will use the click action on the button, which has bid 12.
+```click("12")```
+
+I found the information requested by the user, I will send it to the chat.
+```send_msg_to_user("The price for a 15\\" laptop is 1499 USD.")```
+
+"""
+ })
+
+ # Add action history and errors
+ if self.action_history:
+ user_msgs.append({
+ "type": "text",
+ "text": "# History of past actions\n"
+ })
+ for action in self.action_history:
+ user_msgs.append({
+ "type": "text",
+ "text": f"\n{action}\n"
+ })
+ logger.debug(f"Added past action: {action}")
+
+ if obs["last_action_error"]:
+ user_msgs.append({
+ "type": "text",
+ "text": f"""\
+# Error message from last action
+
+{obs["last_action_error"]}
+
+"""
+ })
+ logger.warning(f"Last action error: {obs['last_action_error']}")
+
+ # Ask for next action
+ user_msgs.append({
+ "type": "text",
+ "text": """\
+# Next action
+
+You will now think step by step and produce your next best action. Reflect on your past actions, any resulting error message, and the current state of the page before deciding on your next action.
+"""
+ })
+
+ # Log the full prompt for debugging
+ prompt_text_strings = []
+ for message in system_msgs + user_msgs:
+ match message["type"]:
+ case "text":
+ prompt_text_strings.append(message["text"])
+ case "image_url":
+ image_url = message["image_url"]
+ if isinstance(message["image_url"], dict):
+ image_url = image_url["url"]
+ if image_url.startswith("data:image"):
+ prompt_text_strings.append(
+ "image_url: " + image_url[:30] + "... (truncated)"
+ )
+ else:
+ prompt_text_strings.append("image_url: " + image_url)
+ case _:
+ raise ValueError(
+ f"Unknown message type {repr(message['type'])} in the task goal."
+ )
+ full_prompt_txt = "\n".join(prompt_text_strings)
+ logger.debug(full_prompt_txt)
+
+ # Query OpenAI model
+ logger.info("Sending request to OpenAI")
+ response = self.openai_client.chat.completions.create(
+ model=self.model_name,
+ messages=[
+ {"role": "system", "content": system_msgs},
+ {"role": "user", "content": user_msgs}
+ ]
+ )
+ action = response.choices[0].message.content
+ logger.info(f"Received action from OpenAI: {action}")
+ self.action_history.append(action)
+ return action, {}
+
+def run_agent(instruction: str, model_name: str = "gpt-4o", start_url: str = "https://www.duckduckgo.com",
+ use_html: bool = False, use_axtree: bool = True, use_screenshot: bool = False):
+ logger.info(f"Starting agent with instruction: {instruction}")
+ logger.info(f"Configuration: model={model_name}, start_url={start_url}")
+
+ trajectory = []
+ agent = BrowserAgent(
+ model_name=model_name,
+ use_html=use_html,
+ use_axtree=use_axtree,
+ use_screenshot=use_screenshot
+ )
+
+ # Initialize BrowserGym environment
+ logger.info("Initializing BrowserGym environment")
+ env = gym.make(
+ "browsergym/openended",
+ task_kwargs={
+ "start_url": start_url,
+ "task": "openended", # Required task parameter
+ "goal": instruction,
+ },
+ wait_for_user_message=True
+ )
+ obs, info = env.reset()
+ logger.info("Environment initialized")
+
+ # Send user instruction to the environment
+ logger.info("Sending user instruction to environment")
+ obs, reward, terminated, truncated, info = env.step({
+ "type": "send_msg_to_user",
+ "message": instruction
+ })
+ processed_obs = agent.obs_preprocessor(obs)
+ logger.info(f"Obs: {processed_obs.keys()}")
+ logger.info(f"axtree_txt: {processed_obs['axtree_txt']}")
+
+ # μ΄κΈ° μν yield
+ trajectory.append((obs['screenshot'], "Initial state"))
+ yield obs['screenshot'], trajectory.copy()
+
+ try:
+ step_count = 0
+ while True:
+ logger.info(f"Step {step_count}: Getting next action")
+ # Get next action from agent
+ action, _ = agent.get_action(processed_obs)
+
+ # Execute action
+ logger.info(f"Step {step_count}: Executing action: {action}")
+ obs, reward, terminated, truncated, info = env.step(action)
+ processed_obs = agent.obs_preprocessor(obs)
+
+ # trajectoryμ numpy array μ§μ μ μ₯
+ trajectory.append((obs['screenshot'], action))
+ logger.info(f"Step {step_count}: Saved screenshot and updated trajectory")
+ step_count += 1
+
+ # λ§€ stepλ§λ€ yield
+ yield obs['screenshot'], trajectory.copy()
+
+ if terminated or truncated:
+ logger.info(f"Episode ended: terminated={terminated}, truncated={truncated}")
+ break
+
+ finally:
+ logger.info("Closing environment")
+ env.close()
+
+def main():
+ args = parse_args()
+
+ # Set logging level from command line argument
+ logger.setLevel(getattr(logging, args.log_level))
+ logger.info("Starting BrowserGym web agent")
+ logger.info(f"Arguments: {args}")
+
+ with gr.Blocks(title="π― Web Agent Demo with BrowserGym & OpenAI") as demo:
+ gr.Markdown("# Web Agent Demo (BrowserGym + OpenAI)")
+ with gr.Row():
+ with gr.Column(scale=1):
+ gr.Markdown("## Examples")
+ gr.Examples(
+ examples=[[e] for e in EXAMPLES],
+ inputs=[gr.Textbox(label="Instruction")],
+ cache_examples=False,
+ )
+ with gr.Column(scale=2):
+ instruction = gr.Textbox(
+ label="Enter your instruction here",
+ placeholder="E.g., 'Search for AI then click #result-stats'",
+ lines=2,
+ )
+ model_name = gr.Dropdown(
+ label="Model",
+ choices=["gpt-4o", "gpt-4o-mini"],
+ value=args.model_name
+ )
+ run_btn = gr.Button("Run Agent")
+ browser_view = gr.Image(label="Browser View")
+ with gr.Column(scale=2):
+ gr.Markdown("## Trajectory History")
+ trajectory_gallery = gr.Gallery(label="Action & State", columns=2)
+
+ run_btn.click(
+ fn=run_agent,
+ inputs=[instruction, model_name],
+ outputs=[browser_view, trajectory_gallery],
+ api_name="run_agent",
+ show_progress=True,
+ concurrency_limit=1
+ )
+
+ logger.info("Launching Gradio interface")
+ demo.launch()
+
+if __name__ == "__main__":
+ main()