Spaces:

ttomy
/

proxy-lite-demo-for-setup

Paused

App Files Files Community

Trisha Tomy commited on Jun 27

Commit

7b40088

1 Parent(s): c8e914e

hopefully working headless remotely

Browse files

Files changed (4) hide show

app.py +38 -31
src/proxy_lite/browser/browser.py +126 -60
src/proxy_lite/environments/webbrowser.py +12 -3
src/proxy_lite/tools/browser_tool.py +93 -30

app.py CHANGED Viewed

@@ -27,10 +27,11 @@ async def initialize_runner():
         config = RunnerConfig.from_dict({
             "environment": {
                 "name": "webbrowser",
-                "homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
-                "headless": True,
                 "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
-                "screenshot_delay": 3.0,
                 "include_html": True,
                 "include_poi_text": True,
             },
@@ -60,20 +61,9 @@ async def initialize_runner():
         logger.info("Proxy-lite Runner initialized successfully.")
     return _runner
-# --- MODIFIED run_async_task FUNCTION (SIMPLIFIED) ---
-# This function is no longer needed in most cases with gevent.monkey.patch_all(asyncio=True)
-# but if you must call async functions from sync context, you simply await them.
-# However, you are already in an async function context within Flask routes when using Gunicorn/gevent.
-# The Gunicorn worker itself implicitly runs an event loop.
-# Let's remove the run_until_complete part.
-# DELETED: def run_async_task(coro): ...
-# --- END MODIFIED run_async_task FUNCTION ---
 @app.route('/run_proxy_task', methods=['POST'])
-async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
     data = request.json
     request_task_instruction = data.get('task')
@@ -90,27 +80,40 @@ async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
         logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
         return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
-    agent_task = (
-        f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
-        f"After attempting to log in, observe the page carefully. "
-        f"If the login was successful, the URL should change from the login page, and you should see elements indicating a logged-in state (e.g., a Salesforce navigation menu, a home screen, or a profile icon), rather than a login form or an error message. "
-        f"If the login is successful, {request_task_instruction}. "
-        f"Report the final status of the requested action and confirmation of successful login."
-    )
-    logger.info(f"Executing agent task: '{agent_task[:200]}...'")
     try:
-        # Since run_proxy_task_endpoint is now async, you can directly await
         runner = await initialize_runner()
-        result = await runner.run(agent_task) # <--- AWAIT DIRECTLY
-        logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
         return jsonify({"output": result})
     except Exception as e:
         logger.exception(f"Error processing Salesforce task: {e}")
-        # The RuntimeWarning: coroutine 'initialize_runner' was never awaited will disappear
-        # because initialize_runner is now awaited.
         return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
 @app.route('/')
@@ -119,8 +122,12 @@ def root():
     return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
 if __name__ == '__main__':
     if not os.environ.get("HF_API_TOKEN"):
         logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
-        exit(1)
-    logger.info("Starting Flask development server on 0.0.0.0:7860...")
-    app.run(host='0.0.0.0', port=7860, debug=True)

         config = RunnerConfig.from_dict({
             "environment": {
                 "name": "webbrowser",
+                # Set homepage to Salesforce's generic login URL to avoid premature waits for target page elements.
+                "homepage": "https://login.salesforce.com/",
+                "headless": True, # Keep this False for local testing
                 "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
+                "screenshot_delay": 0.5, # Reduced for faster debugging cycles
                 "include_html": True,
                 "include_poi_text": True,
             },
         logger.info("Proxy-lite Runner initialized successfully.")
     return _runner
 @app.route('/run_proxy_task', methods=['POST'])
+async def run_proxy_task_endpoint():
     data = request.json
     request_task_instruction = data.get('task')
         logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
         return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
+    # Define the specific Account Forecast Settings URL
+    account_forecast_url = "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home"
+    # Define the tool code block to open a new tab and navigate after login
+    # Using a raw f-string for multiline tool code block
+    tool_code_block_new_tab = fr"""
+<tool_code>
+await browser.open_new_tab_and_go_to(url='{account_forecast_url}')
+</tool_code>
+"""
+    # Refined agent_task instruction to be sequential and robust to Salesforce redirects
+    agent_task = f"""
+    **Task Instructions for Proxy Lite Agent:**
+    1.  **Start on Login Page:** Navigate to the Salesforce login page.
+    2.  **Perform Login:** Log in to Salesforce using the provided username '{salesforce_username}' and password '{salesforce_password}'. Ensure all login fields are filled and the 'Log In' button is clicked.
+    3.  **Handle Post-Login Redirect:** After clicking the 'Log In' button:
+        * Observe the current URL. If the URL has changed from the initial login domain (e.g., from `login.salesforce.com` or `my.salesforce.com`) **immediately execute the following tool code block to open a new tab and navigate directly to the Account Forecast Settings page (`{account_forecast_url}`) to bypass any persistent loading issues or internal redirects:**
+        {tool_code_block_new_tab.strip()}
+    4.  **Confirm Target Page Load:** After successfully navigating to '{account_forecast_url}' (either directly after login or via the new tab strategy), ensure the page is fully loaded and stable. This means no loading spinners should be visible, and the main content for 'Account Forecast Settings' (like a clear heading, relevant toggles, or data tables) should be present and interactive.
+    5.  **Execute Main Task:** Once the Account Forecast Settings page is confirmed loaded and stable, proceed with the original user request: {request_task_instruction}.
+    6.  **Report Final Status:** Report the final status of the requested action, confirming both successful login and complete page load of the Account Forecast Settings.
+    """
+    logger.info(f"Executing agent task (truncated for log): '{agent_task[:500]}...'")
     try:
         runner = await initialize_runner()
+        result = await runner.run(agent_task)
+        logger.info(f"Proxy-lite task completed. Output (truncated for log): {result[:500]}...")
         return jsonify({"output": result})
     except Exception as e:
         logger.exception(f"Error processing Salesforce task: {e}")
         return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
 @app.route('/')
     return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
 if __name__ == '__main__':
+    # It is crucial to set HF_API_TOKEN as an environment variable (e.g., in a .env file or directly)
+    # for local testing as well, otherwise initialize_runner will fail.
     if not os.environ.get("HF_API_TOKEN"):
         logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
+        # Removed exit(1) to allow the Flask app to start for basic connectivity checks,
+        # but runner initialization will still fail if token is missing.
+        # For full functionality, the token is essential.
+    logger.info("Starting Flask development server on 0.0.0.0:6101...")
+    app.run(host='0.0.0.0', port=6101, debug=True)

src/proxy_lite/browser/browser.py CHANGED Viewed

@@ -106,7 +106,10 @@ class BrowserSession:
         self.context = await self.browser.new_context(
             viewport={"width": self.viewport_width, "height": self.viewport_height},
         )
-        await self.context.new_page()
         self.context.set_default_timeout(60_000)
         self.current_page.set_default_timeout(60_000)
         await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
@@ -129,11 +132,11 @@ class BrowserSession:
     @property
     def current_page(self) -> Optional[Page]:
-        if self.context.pages:
-            return self.context.pages[-1]
         return None
-    @property
     def current_url(self) -> Optional[str]:
         if self.current_page:
             return self.current_page.url
@@ -176,7 +179,6 @@ class BrowserSession:
             logger.error(f"Error processing iframe: {e}")
             return None
-    # re-run for cases of mid-run redirects
     @retry(
         wait=wait_exponential(multiplier=1, min=1, max=10),
         stop=stop_after_delay(5),
@@ -185,54 +187,100 @@ class BrowserSession:
     )
     async def update_poi(self) -> None:
         try:
-            # We will use "domcontentloaded" as a base and then wait for specific elements
-            await self.current_page.wait_for_load_state("domcontentloaded", timeout=60000) # Reduced timeout for initial load
-            logger.debug("wait_for_load_state('domcontentloaded') completed.")
-            # --- MODIFICATION START ---
-            # Wait for the "Account Forecasting" heading to be visible
-            # Adjust the selector below based on the actual HTML of the Salesforce page.
-            # Common selectors could be:
-            # - `h1:has-text('Account Forecasting')`
-            # - `h2:has-text('Account Forecasting')`
-            # - `div.some-class-name:has-text('Account Forecasting')`
-            # - `[data-qa-id="account-forecasting-heading"]` (if Salesforce uses data-qa attributes)
-            # You might need to inspect the Salesforce page to get the exact selector.
-            # For now, let's assume it's an h1 or h2 tag containing the text.
-            try:
-                await self.current_page.wait_for_selector(
-                    "h1:has-text('Account Forecasting'), h2:has-text('Account Forecasting')",
-                    timeout=60000, # Set a reasonable timeout for this specific element
-                    state="visible"
-                )
-                logger.debug("Successfully waited for 'Account Forecasting' heading.")
-            except PlaywrightTimeoutError as e:
-                logger.error(f"Timeout waiting for 'Account Forecasting' heading on URL: {self.current_page.url}")
-                # You might want to log more specific HTML/screenshot here if this still times out often
-                raise # Re-raise if this critical element doesn't appear
-            # It's still good to wait for the body to be visible, but with a shorter timeout
-            # if the previous specific heading check passed.
-            try:
-                await self.current_page.wait_for_selector("body", timeout=30000, state="visible")
-                logger.debug("wait_for_selector('body', state='visible') completed.")
-            except PlaywrightTimeoutError as e:
-                logger.warning(f"DEBUGGING: Playwright Timeout (30s) on body selector, but 'Account Forecasting' heading was found. This might be acceptable if the page is usable.")
-                # We can choose to suppress this specific timeout if the critical element is found,
-                # or re-raise it if a fully loaded body is strictly necessary for further actions.
-                # For now, let's just log and continue, as the primary issue was full page load.
-                pass # Do not re-raise if we've found our key indicator.
-        except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
-            logger.error(f"Timeout waiting for website load state (domcontentloaded): {self.current_url}")
-            raise # Re-raise if initial load_state itself times out
         except Exception as e:
-            logger.error(f"An unexpected error occurred during page readiness check: {e}")
             raise
-        # Run the bounding box javascript code to highlight the points of interest on the page
         page_info = await self.current_page.evaluate(
             """() => {
                 overwriteDefaultSelectConvergence();
@@ -271,6 +319,7 @@ class BrowserSession:
                     centroid["left"] += iframe_offsets[index]["x"]
                     centroid["top"] += iframe_offsets[index]["y"]
                     centroid["right"] += iframe_offsets[index]["x"]
                     centroid["bottom"] += iframe_offsets[index]["y"]
                 element_centroids.extend(iframe_poi["element_centroids"])
@@ -307,17 +356,12 @@ class BrowserSession:
         if delay > 0.0:
             await asyncio.sleep(delay)
         await self.update_poi()
-        old_poi_positions = [tuple(point) for point in self.poi_centroids]
         img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
         annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
-        # check page has not changed since the screenshot was taken
-        await self.update_poi()
-        new_poi_positions = [tuple(point) for point in self.poi_centroids]
-        if new_poi_positions != old_poi_positions:
-            # if it has changed, take another
-            img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
-            await self.update_poi()
-            annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
         return img, annotated_img
     async def goto(self, url: str) -> None:
@@ -424,6 +468,28 @@ class BrowserSession:
                 await self.current_page.keyboard.press("Control+Home")
                 await self.current_page.keyboard.press("Control+Shift+End")
             await self.current_page.keyboard.press("Backspace")
 if __name__ == "__main__":
@@ -439,4 +505,4 @@ if __name__ == "__main__":
             with open("output.png", "wb") as f:
                 f.write(annotated_image)
-    asyncio.run(dummy_test())

         self.context = await self.browser.new_context(
             viewport={"width": self.viewport_width, "height": self.viewport_height},
         )
+        # Ensure there's at least one page open
+        if not self.context.pages:
+            await self.context.new_page()
         self.context.set_default_timeout(60_000)
         self.current_page.set_default_timeout(60_000)
         await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
     @property
     def current_page(self) -> Optional[Page]:
+        if self.context and self.context.pages:
+            return self.context.pages[-1] # Return the most recently opened page
         return None
+    @property
     def current_url(self) -> Optional[str]:
         if self.current_page:
             return self.current_page.url
             logger.error(f"Error processing iframe: {e}")
             return None
     @retry(
         wait=wait_exponential(multiplier=1, min=1, max=10),
         stop=stop_after_delay(5),
     )
     async def update_poi(self) -> None:
         try:
+            # Wait for basic page load states to ensure the DOM is ready.
+            # This is a fundamental wait that should always apply.
+            await self.current_page.wait_for_load_state("domcontentloaded", timeout=60000)
+            logger.debug(f"DEBUG: wait_for_load_state('domcontentloaded') completed for {self.current_page.url}.")
+            current_url = self.current_page.url
+            # Define common Salesforce URL patterns for different states
+            login_url_patterns = [
+                "login.salesforce.com",
+                "identity.force.com",
+                "auth.lightning.force.com",
+                "setup.salesforce.com", # Sometimes a setup login redirects here temporarily
+                "my.salesforce.com" # Your specific custom domain login redirects here
+            ]
+            # This is the main Salesforce Lightning application base URL, typically seen after login.
+            # We treat this as an intermediate loading state before the specific target page.
+            intermediate_app_url_pattern = "/one/one.app"
+            # Check the current state of the page based on its URL
+            is_on_login_page = any(pattern in current_url for pattern in login_url_patterns)
+            is_on_intermediate_app_page = intermediate_app_url_pattern in current_url
+            # Note: is_on_target_forecast_page checks if the specific target path is in the URL
+            is_on_target_forecast_page = "/AccountForecastSettings/home" in current_url
+            # --- CONDITIONAL WAITING LOGIC BASED ON URL ---
+            if is_on_target_forecast_page:
+                logger.info(f"INFO: Detected target Account Forecast Settings page: {current_url}. Waiting for content.")
+                # When on the specific target page, wait for its content and spinners
+                spinner_selectors = [
+                    "div.slds-spinner_container",
+                    "div.auraLoadingBox",
+                    "div.dxp_axb_container", # Main overlay from your inspect screenshot
+                    "div.slds-sprite-astro-x-large" # Specific animated element itself
+                ]
+                for selector in spinner_selectors:
+                    try:
+                        await self.current_page.wait_for_selector(selector, state="hidden", timeout=5000) # Reduced timeout
+                        logger.debug(f"DEBUG: Spinner element '{selector}' became hidden for {self.current_page.url}.")
+                    except PlaywrightTimeoutError:
+                        logger.warning(f"DEBUGGING: Spinner element '{selector}' not detected or did not disappear on {self.current_page.url} within 5s.")
+                # Wait for a known element on the Account Forecast Settings page to ensure content is there.
+                try:
+                    # Added 'h2' for section headers, and a more generic 'div[data-aura-rendered-by]' for Lightning components
+                    await self.current_page.wait_for_selector("h1.slds-page-header__title, h2, .account-forecast-settings-component, div[data-aura-rendered-by]", state="visible", timeout=15000) # Increased timeout slightly for robust content load
+                    logger.debug(f"DEBUG: Confirmed main page element visible for {self.current_page.url}.")
+                except PlaywrightTimeoutError:
+                    logger.warning(f"DEBUGGING: Main page element not visible on {self.current_page.url} within 15s. This might indicate incomplete page load despite no spinner.")
+            elif is_on_login_page:
+                logger.info(f"INFO: Detected Salesforce login page: {current_url}. Waiting for login elements.")
+                # When on a login page, just wait for the login form elements to be visible
+                try:
+                    await self.current_page.wait_for_selector("input[type='email'], input[type='password'], input[type='submit'], #username, #password, #Login", state="visible", timeout=10000)
+                    logger.debug(f"DEBUG: Login page elements visible on {self.current_page.url}.")
+                except PlaywrightTimeoutError:
+                    logger.warning(f"DEBUGGING: Login page elements not visible on {self.current_page.url} within 10s. This may happen if elements are in an iframe or if page is extremely slow.")
+            elif is_on_intermediate_app_page:
+                logger.info(f"INFO: Detected intermediate Salesforce Lightning app loading page: {current_url}. Waiting for network idle and app spinner.")
+                # This is the /one/one.app page or similar. Don't wait for specific content, just general load.
+                try:
+                    await self.current_page.wait_for_load_state("networkidle", timeout=30000) # Give it more time for network to settle
+                    logger.debug(f"DEBUG: Network idle detected on intermediate app page: {current_url}.")
+                except PlaywrightTimeoutError:
+                    logger.warning(f"DEBUGGING: Network idle timeout on intermediate app page: {current_url}. Proceeding anyway.")
+                # Also try to wait for a common full-app spinner to disappear, if present
+                try:
+                    await self.current_page.wait_for_selector('div.app-spinner, div.auraLoadingBox', state='hidden', timeout=15000) # Added auraLoadingBox as it might reappear
+                    logger.debug(f"DEBUG: App spinner on intermediate page became hidden.")
+                except PlaywrightTimeoutError:
+                    logger.warning(f"DEBUGGING: App spinner on intermediate page not found or did not disappear.")
+            else:
+                logger.info(f"INFO: Detected unhandled URL type: {current_url}. Performing generic body wait.")
+                # Fallback for any other page, just wait for body to be visible
+                try:
+                    await self.current_page.wait_for_selector("body", timeout=5000, state="visible")
+                    logger.debug(f"DEBUG: wait_for_selector('body', state='visible') completed for {self.current_page.url}.")
+                except PlaywrightTimeoutError:
+                    logger.warning(f"DEBUGGING: Playwright Timeout (5s) on body selector for {self.current_page.url}. Continuing anyway.")
+                    pass
+        except PlaywrightTimeoutError as e:
+            logger.error(f"ERROR: Timeout waiting for page readiness for {self.current_page.url}: {e}")
+            raise # Re-raise if essential waits fail (e.g., initial domcontentloaded)
         except Exception as e:
+            logger.error(f"ERROR: An unexpected error occurred during page readiness check for {self.current_page.url}: {e}")
             raise
+        # Rest of update_poi: Run the bounding box javascript code to highlight the points of interest on the page
         page_info = await self.current_page.evaluate(
             """() => {
                 overwriteDefaultSelectConvergence();
                     centroid["left"] += iframe_offsets[index]["x"]
                     centroid["top"] += iframe_offsets[index]["y"]
                     centroid["right"] += iframe_offsets[index]["x"]
+                    # Fix: Removed duplicate 'centroid["y"] += iframe_offsets[index]["y"]'
                     centroid["bottom"] += iframe_offsets[index]["y"]
                 element_centroids.extend(iframe_poi["element_centroids"])
         if delay > 0.0:
             await asyncio.sleep(delay)
         await self.update_poi()
+        # Keep original logic if page is highly dynamic, but for static shots, simpler is faster
+        # old_poi_positions = [tuple(point) for point in self.poi_centroids]
         img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
         annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
+        # Re-evaluating this block for performance. Removed redundant update_poi and conditional screenshot.
+        # If precise screenshot timing is needed, the caller should manage delays and updates.
         return img, annotated_img
     async def goto(self, url: str) -> None:
                 await self.current_page.keyboard.press("Control+Home")
                 await self.current_page.keyboard.press("Control+Shift+End")
             await self.current_page.keyboard.press("Backspace")
+    async def open_new_tab_and_go_to(self, url: str) -> None:
+        """
+        Opens a new browser tab/page and navigates to the specified URL.
+        Closes the old page if it's not the last one remaining.
+        """
+        logger.info(f"Attempting to open a new tab and navigate to: {url}")
+        new_page = await self.context.new_page()
+        # Close the previous page if it's not the only one left in the context
+        if len(self.context.pages) > 1 and self.current_page and self.current_page != new_page:
+            try:
+                await self.current_page.close()
+                logger.debug("Closed previous page.")
+            except Exception as e:
+                logger.warning(f"Could not close previous page (might already be closed or detached): {e}")
+        # After navigation, trigger POI update to reflect the new page's state
+        await new_page.goto(url, wait_until="domcontentloaded")
+        logger.info(f"Successfully navigated to {url} in a new tab.")
+        # Crucial: update_poi uses self.current_page, which is now new_page implicitly
+        await self.update_poi()
 if __name__ == "__main__":
             with open("output.png", "wb") as f:
                 f.write(annotated_image)
+    asyncio.run(dummy_test())

src/proxy_lite/environments/webbrowser.py CHANGED Viewed

@@ -12,7 +12,8 @@ from proxy_lite.environments.environment_base import (
     State,
 )
 from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
 @Environments.register_environment_config("webbrowser")
 class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
@@ -75,7 +76,14 @@ class WebBrowserEnvironment(BaseEnvironment):
         return []
     async def initialise(self) -> Observation:
-        await self.browser.goto(self.config.homepage)
         original_img, annotated_img = await self.browser.screenshot(
             delay=self.config.screenshot_delay,
         )
@@ -92,6 +100,7 @@ class WebBrowserEnvironment(BaseEnvironment):
         if self.config.keep_original_image:
             info["original_image"] = base64.b64encode(original_img).decode("utf-8")
         return Observation(
             state=State(
                 text=f"URL: {self.browser.current_url}"
@@ -182,4 +191,4 @@ class WebBrowserEnvironment(BaseEnvironment):
     async def get_info(self) -> dict[str, Any]:
         info = {}
-        return info

     State,
 )
 from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
+# Import logger from proxy_lite.logger, or if it's already available via BaseEnvironment
+from proxy_lite.logger import logger # Assuming you want to use the same logger
 @Environments.register_environment_config("webbrowser")
 class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
         return []
     async def initialise(self) -> Observation:
+        self.logger.debug(f"DEBUG: Initialising WebBrowserEnvironment. Homepage: {self.config.homepage}")
+        try:
+            await self.browser.goto(self.config.homepage)
+            self.logger.debug(f"DEBUG: Browser navigated to homepage. Current URL: {self.browser.current_url}")
+        except Exception as e:
+            self.logger.error(f"ERROR: Failed to navigate to homepage {self.config.homepage}: {e}")
+            raise # Re-raise to propagate the error
         original_img, annotated_img = await self.browser.screenshot(
             delay=self.config.screenshot_delay,
         )
         if self.config.keep_original_image:
             info["original_image"] = base64.b64encode(original_img).decode("utf-8")
+        self.logger.debug(f"DEBUG: Initial observation captured. URL: {self.browser.current_url}")
         return Observation(
             state=State(
                 text=f"URL: {self.browser.current_url}"
     async def get_info(self) -> dict[str, Any]:
         info = {}
+        return info

src/proxy_lite/tools/browser_tool.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 from contextlib import AsyncExitStack
-from typing import List, Literal, Optional
 from pydantic import BaseModel, Field
@@ -123,6 +123,10 @@ class ReloadParams(BaseModel):
 class DoNothingParams(BaseModel):
     pass
 class BrowserTool(Tool):
     def __init__(self, session: BrowserSession) -> None:
@@ -148,7 +152,7 @@ class BrowserTool(Tool):
     async def goto(self, url: str) -> ToolExecutionResponse:
         """Go directly to a specific web url. Specify the exact URL."""
         await self.browser.goto(url)
-        return ToolExecutionResponse()
     @attach_param_schema(GoogleSearchParams)
     async def google_search(self, query_plan: str, query: str) -> ToolExecutionResponse:
@@ -157,13 +161,22 @@ class BrowserTool(Tool):
         """
         url = f"https://www.google.com/search?q={query}"
         await self.browser.goto(url)
-        return ToolExecutionResponse()
     @attach_param_schema(ClickParams)
     async def click(self, mark_id: int) -> ToolExecutionResponse:
         """Click on an element of the page."""
-        await self.browser.click(mark_id=mark_id)
-        return ToolExecutionResponse()
     @attach_param_schema(TypeParams)
     async def type(self, entries: List[dict], submit: bool) -> ToolExecutionResponse:
@@ -171,51 +184,101 @@ class BrowserTool(Tool):
         You can type into one or more elements.
         Note that the text inside an element is cleared before typing.
         """
         for i, entry_dict in enumerate(entries):
-            entry = TypeEntry(**entry_dict)
-            last_entry = i == len(entries) - 1
-            old_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
-            await self.browser.enter_text(
-                mark_id=entry.mark_id,
-                text=entry.content,
-                submit=submit and last_entry,
-            )
-            await self.browser.update_poi()
-            new_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
-            if not last_entry and old_poi_positions != new_poi_positions:
-                logger.error(
-                    "POI positions changed mid-typing, cancelling future type entries.",
                 )
-                break
-        return ToolExecutionResponse()
     @attach_param_schema(ScrollParams)
     async def scroll(self, direction: str, mark_id: int) -> ToolExecutionResponse:
         """Scroll the page (or a scrollable element) up, down, left or right."""
-        if mark_id == -1:
-            mark_id = None
-        await self.browser.scroll(direction=direction, mark_id=mark_id)
-        return ToolExecutionResponse()
     @attach_param_schema(BackParams)
     async def back(self) -> ToolExecutionResponse:
         """Go back to the previous page."""
-        await self.browser.go_back()
-        return ToolExecutionResponse()
     @attach_param_schema(WaitParams)
     async def wait(self) -> ToolExecutionResponse:
         """Wait three seconds. Useful when the page appears to still be loading, or if there are any unfinished webpage processes."""  # noqa: E501
         await asyncio.sleep(3)
-        return ToolExecutionResponse()
     @attach_param_schema(ReloadParams)
     async def reload(self) -> ToolExecutionResponse:
         """Reload the current page. Useful when the page seems unresponsive, broken, outdated, or if you want to reset the page to its initial state."""  # noqa: E501
-        await self.browser.reload()
-        return ToolExecutionResponse()
     @attach_param_schema(DoNothingParams)
     async def do_nothing_tool(self) -> ToolExecutionResponse:
         """Do nothing. Use this if you have no need for the browser at this time."""
-        return ToolExecutionResponse()

 import asyncio
 from contextlib import AsyncExitStack
+from typing import List, Literal, Optional, Any
 from pydantic import BaseModel, Field
 class DoNothingParams(BaseModel):
     pass
+# --- NEW: Parameters for open_new_tab_and_go_to tool ---
+class OpenNewTabAndGoToParams(BaseModel):
+    url: str = Field(..., description="The URL to navigate to in the new tab.")
 class BrowserTool(Tool):
     def __init__(self, session: BrowserSession) -> None:
     async def goto(self, url: str) -> ToolExecutionResponse:
         """Go directly to a specific web url. Specify the exact URL."""
         await self.browser.goto(url)
+        return ToolExecutionResponse(observation=f"Successfully navigated to URL: {url}") # Added observation
     @attach_param_schema(GoogleSearchParams)
     async def google_search(self, query_plan: str, query: str) -> ToolExecutionResponse:
         """
         url = f"https://www.google.com/search?q={query}"
         await self.browser.goto(url)
+        return ToolExecutionResponse(observation=f"Performed Google search for: {query}") # Added observation
     @attach_param_schema(ClickParams)
     async def click(self, mark_id: int) -> ToolExecutionResponse:
         """Click on an element of the page."""
+        try:
+            await self.browser.click(mark_id=mark_id)
+            return ToolExecutionResponse(observation=f"Clicked element with mark ID: {mark_id}")
+        except IndexError as e:
+            # This happens if mark_id is out of bounds for browser.poi_centroids
+            logger.error(f"Click failed: Mark ID {mark_id} not found or POI list empty. Error: {e}")
+            return ToolExecutionResponse(observation=f"Failed to click element with mark ID {mark_id}. Element not found or POI list invalid.")
+        except Exception as e:
+            logger.error(f"Click failed with unexpected error for mark ID {mark_id}: {e}")
+            return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to click element {mark_id}: {e}")
     @attach_param_schema(TypeParams)
     async def type(self, entries: List[dict], submit: bool) -> ToolExecutionResponse:
         You can type into one or more elements.
         Note that the text inside an element is cleared before typing.
         """
+        typed_ids = []
         for i, entry_dict in enumerate(entries):
+            try:
+                entry = TypeEntry(**entry_dict)
+                last_entry = i == len(entries) - 1
+                old_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
+                await self.browser.enter_text(
+                    mark_id=entry.mark_id,
+                    text=entry.content,
+                    submit=submit and last_entry,
                 )
+                typed_ids.append(entry.mark_id)
+                await self.browser.update_poi()
+                new_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
+                if not last_entry and old_poi_positions != new_poi_positions:
+                    logger.error(
+                        "POI positions changed mid-typing, cancelling future type entries.",
+                    )
+                    break
+            except IndexError as e:
+                logger.error(f"Type failed: Mark ID {entry.mark_id} not found or POI list empty. Error: {e}")
+                return ToolExecutionResponse(observation=f"Failed to type into element with mark ID {entry.mark_id}. Element not found or POI list invalid. Typed into: {typed_ids if typed_ids else 'none'}.")
+            except Exception as e:
+                logger.error(f"Type failed with unexpected error for mark ID {entry.mark_id}: {e}")
+                return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to type into element {entry.mark_id}: {e}. Typed into: {typed_ids if typed_ids else 'none'}.")
+        return ToolExecutionResponse(
+            observation=f"Typed text into elements with mark IDs: {typed_ids}",
+        )
     @attach_param_schema(ScrollParams)
     async def scroll(self, direction: str, mark_id: int) -> ToolExecutionResponse:
         """Scroll the page (or a scrollable element) up, down, left or right."""
+        try:
+            if mark_id == -1:
+                mark_id_for_browser = None # Pass None to browser.scroll for page scroll
+            else:
+                mark_id_for_browser = mark_id
+            await self.browser.scroll(direction=direction, mark_id=mark_id_for_browser)
+            return ToolExecutionResponse(observation=f"Scrolled {direction} on element with mark ID: {mark_id if mark_id != -1 else 'page'}")
+        except IndexError as e:
+            logger.error(f"Scroll failed: Mark ID {mark_id} not found or POI list empty. Error: {e}")
+            return ToolExecutionResponse(observation=f"Failed to scroll element with mark ID {mark_id}. Element not found or POI list invalid.")
+        except Exception as e:
+            logger.error(f"Scroll failed with unexpected error for mark ID {mark_id}: {e}")
+            return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to scroll element {mark_id}: {e}")
     @attach_param_schema(BackParams)
     async def back(self) -> ToolExecutionResponse:
         """Go back to the previous page."""
+        try:
+            await self.browser.go_back()
+            return ToolExecutionResponse(observation="Went back to the previous page.")
+        except Exception as e:
+            logger.error(f"Go back failed: {e}")
+            return ToolExecutionResponse(observation=f"Failed to go back: {e}")
     @attach_param_schema(WaitParams)
     async def wait(self) -> ToolExecutionResponse:
         """Wait three seconds. Useful when the page appears to still be loading, or if there are any unfinished webpage processes."""  # noqa: E501
         await asyncio.sleep(3)
+        return ToolExecutionResponse(observation="Waited for a few seconds.")
     @attach_param_schema(ReloadParams)
     async def reload(self) -> ToolExecutionResponse:
         """Reload the current page. Useful when the page seems unresponsive, broken, outdated, or if you want to reset the page to its initial state."""  # noqa: E501
+        try:
+            await self.browser.reload()
+            return ToolExecutionResponse(observation="Reloaded the current page.")
+        except Exception as e:
+            logger.error(f"Reload failed: {e}")
+            return ToolExecutionResponse(observation=f"Failed to reload the page: {e}")
     @attach_param_schema(DoNothingParams)
     async def do_nothing_tool(self) -> ToolExecutionResponse:
         """Do nothing. Use this if you have no need for the browser at this time."""
+        return ToolExecutionResponse(observation="Did nothing in the browser.")
+    # --- NEW: Expose the open_new_tab_and_go_to method as a tool ---
+    @attach_param_schema(OpenNewTabAndGoToParams)
+    async def open_new_tab_and_go_to(self, url: str) -> ToolExecutionResponse:
+        """
+        Opens a new browser tab/page and navigates to the specified URL.
+        Closes the old page if it's not the last one remaining.
+        Use this to bypass loading issues by forcing a new navigation.
+        """
+        try:
+            await self.browser.open_new_tab_and_go_to(url)
+            return ToolExecutionResponse(
+                observation=f"Successfully opened new tab and navigated to: {url}",
+            )
+        except Exception as e:
+            logger.error(f"Error opening new tab and navigating to {url}: {e}")
+            return ToolExecutionResponse(observation=f"Failed to open new tab and navigate to {url}: {e}")