ar08's picture
Upload 1040 files
246d201 verified
import os
from openhands.core.exceptions import BrowserUnavailableException
from openhands.core.schema import ActionType
from openhands.events.action import BrowseInteractiveAction, BrowseURLAction
from openhands.events.observation import BrowserOutputObservation
from openhands.runtime.browser.browser_env import BrowserEnv
from openhands.utils.async_utils import call_sync_from_async
async def browse(
action: BrowseURLAction | BrowseInteractiveAction, browser: BrowserEnv | None
) -> BrowserOutputObservation:
if browser is None:
raise BrowserUnavailableException()
if isinstance(action, BrowseURLAction):
# legacy BrowseURLAction
asked_url = action.url
if not asked_url.startswith('http'):
asked_url = os.path.abspath(os.curdir) + action.url
action_str = f'goto("{asked_url}")'
elif isinstance(action, BrowseInteractiveAction):
# new BrowseInteractiveAction, supports full featured BrowserGym actions
# action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
action_str = action.browser_actions
else:
raise ValueError(f'Invalid action type: {action.action}')
try:
# obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
obs = await call_sync_from_async(browser.step, action_str)
return BrowserOutputObservation(
content=obs['text_content'], # text content of the page
url=obs.get('url', ''), # URL of the page
screenshot=obs.get('screenshot', None), # base64-encoded screenshot, png
open_pages_urls=obs.get('open_pages_urls', []), # list of open pages
active_page_index=obs.get(
'active_page_index', -1
), # index of the active page
dom_object=obs.get('dom_object', {}), # DOM object
axtree_object=obs.get('axtree_object', {}), # accessibility tree object
extra_element_properties=obs.get('extra_element_properties', {}),
focused_element_bid=obs.get(
'focused_element_bid', None
), # focused element bid
last_browser_action=obs.get(
'last_action', ''
), # last browser env action performed
last_browser_action_error=obs.get('last_action_error', ''),
error=True if obs.get('last_action_error', '') else False, # error flag
trigger_by_action=action.action,
)
except Exception as e:
return BrowserOutputObservation(
content=str(e),
screenshot='',
error=True,
last_browser_action_error=str(e),
url=asked_url if action.action == ActionType.BROWSE else '',
trigger_by_action=action.action,
)