Trisha Tomy commited on
Commit
7b40088
·
1 Parent(s): c8e914e

hopefully working headless remotely

Browse files
app.py CHANGED
@@ -27,10 +27,11 @@ async def initialize_runner():
27
  config = RunnerConfig.from_dict({
28
  "environment": {
29
  "name": "webbrowser",
30
- "homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
31
- "headless": True,
 
32
  "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
33
- "screenshot_delay": 3.0,
34
  "include_html": True,
35
  "include_poi_text": True,
36
  },
@@ -60,20 +61,9 @@ async def initialize_runner():
60
  logger.info("Proxy-lite Runner initialized successfully.")
61
  return _runner
62
 
63
- # --- MODIFIED run_async_task FUNCTION (SIMPLIFIED) ---
64
- # This function is no longer needed in most cases with gevent.monkey.patch_all(asyncio=True)
65
- # but if you must call async functions from sync context, you simply await them.
66
- # However, you are already in an async function context within Flask routes when using Gunicorn/gevent.
67
- # The Gunicorn worker itself implicitly runs an event loop.
68
- # Let's remove the run_until_complete part.
69
-
70
- # DELETED: def run_async_task(coro): ...
71
-
72
- # --- END MODIFIED run_async_task FUNCTION ---
73
-
74
 
75
  @app.route('/run_proxy_task', methods=['POST'])
76
- async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
77
  data = request.json
78
  request_task_instruction = data.get('task')
79
 
@@ -90,27 +80,40 @@ async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
90
  logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
91
  return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
92
 
93
- agent_task = (
94
- f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
95
- f"After attempting to log in, observe the page carefully. "
96
- f"If the login was successful, the URL should change from the login page, and you should see elements indicating a logged-in state (e.g., a Salesforce navigation menu, a home screen, or a profile icon), rather than a login form or an error message. "
97
- f"If the login is successful, {request_task_instruction}. "
98
- f"Report the final status of the requested action and confirmation of successful login."
99
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- logger.info(f"Executing agent task: '{agent_task[:200]}...'")
102
 
103
  try:
104
- # Since run_proxy_task_endpoint is now async, you can directly await
105
  runner = await initialize_runner()
106
- result = await runner.run(agent_task) # <--- AWAIT DIRECTLY
107
 
108
- logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
109
  return jsonify({"output": result})
110
  except Exception as e:
111
  logger.exception(f"Error processing Salesforce task: {e}")
112
- # The RuntimeWarning: coroutine 'initialize_runner' was never awaited will disappear
113
- # because initialize_runner is now awaited.
114
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
115
 
116
  @app.route('/')
@@ -119,8 +122,12 @@ def root():
119
  return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
120
 
121
  if __name__ == '__main__':
 
 
122
  if not os.environ.get("HF_API_TOKEN"):
123
  logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
124
- exit(1)
125
- logger.info("Starting Flask development server on 0.0.0.0:7860...")
126
- app.run(host='0.0.0.0', port=7860, debug=True)
 
 
 
27
  config = RunnerConfig.from_dict({
28
  "environment": {
29
  "name": "webbrowser",
30
+ # Set homepage to Salesforce's generic login URL to avoid premature waits for target page elements.
31
+ "homepage": "https://login.salesforce.com/",
32
+ "headless": True, # Keep this False for local testing
33
  "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
34
+ "screenshot_delay": 0.5, # Reduced for faster debugging cycles
35
  "include_html": True,
36
  "include_poi_text": True,
37
  },
 
61
  logger.info("Proxy-lite Runner initialized successfully.")
62
  return _runner
63
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  @app.route('/run_proxy_task', methods=['POST'])
66
+ async def run_proxy_task_endpoint():
67
  data = request.json
68
  request_task_instruction = data.get('task')
69
 
 
80
  logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
81
  return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
82
 
83
+ # Define the specific Account Forecast Settings URL
84
+ account_forecast_url = "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home"
85
+
86
+ # Define the tool code block to open a new tab and navigate after login
87
+ # Using a raw f-string for multiline tool code block
88
+ tool_code_block_new_tab = fr"""
89
+ <tool_code>
90
+ await browser.open_new_tab_and_go_to(url='{account_forecast_url}')
91
+ </tool_code>
92
+ """
93
+
94
+ # Refined agent_task instruction to be sequential and robust to Salesforce redirects
95
+ agent_task = f"""
96
+ **Task Instructions for Proxy Lite Agent:**
97
+ 1. **Start on Login Page:** Navigate to the Salesforce login page.
98
+ 2. **Perform Login:** Log in to Salesforce using the provided username '{salesforce_username}' and password '{salesforce_password}'. Ensure all login fields are filled and the 'Log In' button is clicked.
99
+ 3. **Handle Post-Login Redirect:** After clicking the 'Log In' button:
100
+ * Observe the current URL. If the URL has changed from the initial login domain (e.g., from `login.salesforce.com` or `my.salesforce.com`) **immediately execute the following tool code block to open a new tab and navigate directly to the Account Forecast Settings page (`{account_forecast_url}`) to bypass any persistent loading issues or internal redirects:**
101
+ {tool_code_block_new_tab.strip()}
102
+ 4. **Confirm Target Page Load:** After successfully navigating to '{account_forecast_url}' (either directly after login or via the new tab strategy), ensure the page is fully loaded and stable. This means no loading spinners should be visible, and the main content for 'Account Forecast Settings' (like a clear heading, relevant toggles, or data tables) should be present and interactive.
103
+ 5. **Execute Main Task:** Once the Account Forecast Settings page is confirmed loaded and stable, proceed with the original user request: {request_task_instruction}.
104
+ 6. **Report Final Status:** Report the final status of the requested action, confirming both successful login and complete page load of the Account Forecast Settings.
105
+ """
106
 
107
+ logger.info(f"Executing agent task (truncated for log): '{agent_task[:500]}...'")
108
 
109
  try:
 
110
  runner = await initialize_runner()
111
+ result = await runner.run(agent_task)
112
 
113
+ logger.info(f"Proxy-lite task completed. Output (truncated for log): {result[:500]}...")
114
  return jsonify({"output": result})
115
  except Exception as e:
116
  logger.exception(f"Error processing Salesforce task: {e}")
 
 
117
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
118
 
119
  @app.route('/')
 
122
  return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
123
 
124
  if __name__ == '__main__':
125
+ # It is crucial to set HF_API_TOKEN as an environment variable (e.g., in a .env file or directly)
126
+ # for local testing as well, otherwise initialize_runner will fail.
127
  if not os.environ.get("HF_API_TOKEN"):
128
  logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
129
+ # Removed exit(1) to allow the Flask app to start for basic connectivity checks,
130
+ # but runner initialization will still fail if token is missing.
131
+ # For full functionality, the token is essential.
132
+ logger.info("Starting Flask development server on 0.0.0.0:6101...")
133
+ app.run(host='0.0.0.0', port=6101, debug=True)
src/proxy_lite/browser/browser.py CHANGED
@@ -106,7 +106,10 @@ class BrowserSession:
106
  self.context = await self.browser.new_context(
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
- await self.context.new_page()
 
 
 
110
  self.context.set_default_timeout(60_000)
111
  self.current_page.set_default_timeout(60_000)
112
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
@@ -129,11 +132,11 @@ class BrowserSession:
129
 
130
  @property
131
  def current_page(self) -> Optional[Page]:
132
- if self.context.pages:
133
- return self.context.pages[-1]
134
  return None
135
 
136
- @property
137
  def current_url(self) -> Optional[str]:
138
  if self.current_page:
139
  return self.current_page.url
@@ -176,7 +179,6 @@ class BrowserSession:
176
  logger.error(f"Error processing iframe: {e}")
177
  return None
178
 
179
- # re-run for cases of mid-run redirects
180
  @retry(
181
  wait=wait_exponential(multiplier=1, min=1, max=10),
182
  stop=stop_after_delay(5),
@@ -185,54 +187,100 @@ class BrowserSession:
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
- # We will use "domcontentloaded" as a base and then wait for specific elements
189
- await self.current_page.wait_for_load_state("domcontentloaded", timeout=60000) # Reduced timeout for initial load
190
- logger.debug("wait_for_load_state('domcontentloaded') completed.")
191
-
192
- # --- MODIFICATION START ---
193
- # Wait for the "Account Forecasting" heading to be visible
194
- # Adjust the selector below based on the actual HTML of the Salesforce page.
195
- # Common selectors could be:
196
- # - `h1:has-text('Account Forecasting')`
197
- # - `h2:has-text('Account Forecasting')`
198
- # - `div.some-class-name:has-text('Account Forecasting')`
199
- # - `[data-qa-id="account-forecasting-heading"]` (if Salesforce uses data-qa attributes)
200
- # You might need to inspect the Salesforce page to get the exact selector.
201
- # For now, let's assume it's an h1 or h2 tag containing the text.
202
- try:
203
- await self.current_page.wait_for_selector(
204
- "h1:has-text('Account Forecasting'), h2:has-text('Account Forecasting')",
205
- timeout=60000, # Set a reasonable timeout for this specific element
206
- state="visible"
207
- )
208
- logger.debug("Successfully waited for 'Account Forecasting' heading.")
209
- except PlaywrightTimeoutError as e:
210
- logger.error(f"Timeout waiting for 'Account Forecasting' heading on URL: {self.current_page.url}")
211
- # You might want to log more specific HTML/screenshot here if this still times out often
212
- raise # Re-raise if this critical element doesn't appear
213
-
214
- # It's still good to wait for the body to be visible, but with a shorter timeout
215
- # if the previous specific heading check passed.
216
- try:
217
- await self.current_page.wait_for_selector("body", timeout=30000, state="visible")
218
- logger.debug("wait_for_selector('body', state='visible') completed.")
219
- except PlaywrightTimeoutError as e:
220
- logger.warning(f"DEBUGGING: Playwright Timeout (30s) on body selector, but 'Account Forecasting' heading was found. This might be acceptable if the page is usable.")
221
- # We can choose to suppress this specific timeout if the critical element is found,
222
- # or re-raise it if a fully loaded body is strictly necessary for further actions.
223
- # For now, let's just log and continue, as the primary issue was full page load.
224
- pass # Do not re-raise if we've found our key indicator.
225
-
226
-
227
- except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
228
- logger.error(f"Timeout waiting for website load state (domcontentloaded): {self.current_url}")
229
- raise # Re-raise if initial load_state itself times out
230
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  except Exception as e:
232
- logger.error(f"An unexpected error occurred during page readiness check: {e}")
233
  raise
234
 
235
- # Run the bounding box javascript code to highlight the points of interest on the page
236
  page_info = await self.current_page.evaluate(
237
  """() => {
238
  overwriteDefaultSelectConvergence();
@@ -271,6 +319,7 @@ class BrowserSession:
271
  centroid["left"] += iframe_offsets[index]["x"]
272
  centroid["top"] += iframe_offsets[index]["y"]
273
  centroid["right"] += iframe_offsets[index]["x"]
 
274
  centroid["bottom"] += iframe_offsets[index]["y"]
275
  element_centroids.extend(iframe_poi["element_centroids"])
276
 
@@ -307,17 +356,12 @@ class BrowserSession:
307
  if delay > 0.0:
308
  await asyncio.sleep(delay)
309
  await self.update_poi()
310
- old_poi_positions = [tuple(point) for point in self.poi_centroids]
 
311
  img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
312
  annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
313
- # check page has not changed since the screenshot was taken
314
- await self.update_poi()
315
- new_poi_positions = [tuple(point) for point in self.poi_centroids]
316
- if new_poi_positions != old_poi_positions:
317
- # if it has changed, take another
318
- img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
319
- await self.update_poi()
320
- annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
321
  return img, annotated_img
322
 
323
  async def goto(self, url: str) -> None:
@@ -424,6 +468,28 @@ class BrowserSession:
424
  await self.current_page.keyboard.press("Control+Home")
425
  await self.current_page.keyboard.press("Control+Shift+End")
426
  await self.current_page.keyboard.press("Backspace")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
 
428
 
429
  if __name__ == "__main__":
@@ -439,4 +505,4 @@ if __name__ == "__main__":
439
  with open("output.png", "wb") as f:
440
  f.write(annotated_image)
441
 
442
- asyncio.run(dummy_test())
 
106
  self.context = await self.browser.new_context(
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
+ # Ensure there's at least one page open
110
+ if not self.context.pages:
111
+ await self.context.new_page()
112
+
113
  self.context.set_default_timeout(60_000)
114
  self.current_page.set_default_timeout(60_000)
115
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
 
132
 
133
  @property
134
  def current_page(self) -> Optional[Page]:
135
+ if self.context and self.context.pages:
136
+ return self.context.pages[-1] # Return the most recently opened page
137
  return None
138
 
139
+ @property
140
  def current_url(self) -> Optional[str]:
141
  if self.current_page:
142
  return self.current_page.url
 
179
  logger.error(f"Error processing iframe: {e}")
180
  return None
181
 
 
182
  @retry(
183
  wait=wait_exponential(multiplier=1, min=1, max=10),
184
  stop=stop_after_delay(5),
 
187
  )
188
  async def update_poi(self) -> None:
189
  try:
190
+ # Wait for basic page load states to ensure the DOM is ready.
191
+ # This is a fundamental wait that should always apply.
192
+ await self.current_page.wait_for_load_state("domcontentloaded", timeout=60000)
193
+ logger.debug(f"DEBUG: wait_for_load_state('domcontentloaded') completed for {self.current_page.url}.")
194
+
195
+ current_url = self.current_page.url
196
+
197
+ # Define common Salesforce URL patterns for different states
198
+ login_url_patterns = [
199
+ "login.salesforce.com",
200
+ "identity.force.com",
201
+ "auth.lightning.force.com",
202
+ "setup.salesforce.com", # Sometimes a setup login redirects here temporarily
203
+ "my.salesforce.com" # Your specific custom domain login redirects here
204
+ ]
205
+
206
+ # This is the main Salesforce Lightning application base URL, typically seen after login.
207
+ # We treat this as an intermediate loading state before the specific target page.
208
+ intermediate_app_url_pattern = "/one/one.app"
209
+
210
+ # Check the current state of the page based on its URL
211
+ is_on_login_page = any(pattern in current_url for pattern in login_url_patterns)
212
+ is_on_intermediate_app_page = intermediate_app_url_pattern in current_url
213
+ # Note: is_on_target_forecast_page checks if the specific target path is in the URL
214
+ is_on_target_forecast_page = "/AccountForecastSettings/home" in current_url
215
+
216
+ # --- CONDITIONAL WAITING LOGIC BASED ON URL ---
217
+ if is_on_target_forecast_page:
218
+ logger.info(f"INFO: Detected target Account Forecast Settings page: {current_url}. Waiting for content.")
219
+ # When on the specific target page, wait for its content and spinners
220
+ spinner_selectors = [
221
+ "div.slds-spinner_container",
222
+ "div.auraLoadingBox",
223
+ "div.dxp_axb_container", # Main overlay from your inspect screenshot
224
+ "div.slds-sprite-astro-x-large" # Specific animated element itself
225
+ ]
226
+ for selector in spinner_selectors:
227
+ try:
228
+ await self.current_page.wait_for_selector(selector, state="hidden", timeout=5000) # Reduced timeout
229
+ logger.debug(f"DEBUG: Spinner element '{selector}' became hidden for {self.current_page.url}.")
230
+ except PlaywrightTimeoutError:
231
+ logger.warning(f"DEBUGGING: Spinner element '{selector}' not detected or did not disappear on {self.current_page.url} within 5s.")
232
+
233
+ # Wait for a known element on the Account Forecast Settings page to ensure content is there.
234
+ try:
235
+ # Added 'h2' for section headers, and a more generic 'div[data-aura-rendered-by]' for Lightning components
236
+ await self.current_page.wait_for_selector("h1.slds-page-header__title, h2, .account-forecast-settings-component, div[data-aura-rendered-by]", state="visible", timeout=15000) # Increased timeout slightly for robust content load
237
+ logger.debug(f"DEBUG: Confirmed main page element visible for {self.current_page.url}.")
238
+ except PlaywrightTimeoutError:
239
+ logger.warning(f"DEBUGGING: Main page element not visible on {self.current_page.url} within 15s. This might indicate incomplete page load despite no spinner.")
240
+
241
+ elif is_on_login_page:
242
+ logger.info(f"INFO: Detected Salesforce login page: {current_url}. Waiting for login elements.")
243
+ # When on a login page, just wait for the login form elements to be visible
244
+ try:
245
+ await self.current_page.wait_for_selector("input[type='email'], input[type='password'], input[type='submit'], #username, #password, #Login", state="visible", timeout=10000)
246
+ logger.debug(f"DEBUG: Login page elements visible on {self.current_page.url}.")
247
+ except PlaywrightTimeoutError:
248
+ logger.warning(f"DEBUGGING: Login page elements not visible on {self.current_page.url} within 10s. This may happen if elements are in an iframe or if page is extremely slow.")
249
+
250
+ elif is_on_intermediate_app_page:
251
+ logger.info(f"INFO: Detected intermediate Salesforce Lightning app loading page: {current_url}. Waiting for network idle and app spinner.")
252
+ # This is the /one/one.app page or similar. Don't wait for specific content, just general load.
253
+ try:
254
+ await self.current_page.wait_for_load_state("networkidle", timeout=30000) # Give it more time for network to settle
255
+ logger.debug(f"DEBUG: Network idle detected on intermediate app page: {current_url}.")
256
+ except PlaywrightTimeoutError:
257
+ logger.warning(f"DEBUGGING: Network idle timeout on intermediate app page: {current_url}. Proceeding anyway.")
258
+
259
+ # Also try to wait for a common full-app spinner to disappear, if present
260
+ try:
261
+ await self.current_page.wait_for_selector('div.app-spinner, div.auraLoadingBox', state='hidden', timeout=15000) # Added auraLoadingBox as it might reappear
262
+ logger.debug(f"DEBUG: App spinner on intermediate page became hidden.")
263
+ except PlaywrightTimeoutError:
264
+ logger.warning(f"DEBUGGING: App spinner on intermediate page not found or did not disappear.")
265
+
266
+ else:
267
+ logger.info(f"INFO: Detected unhandled URL type: {current_url}. Performing generic body wait.")
268
+ # Fallback for any other page, just wait for body to be visible
269
+ try:
270
+ await self.current_page.wait_for_selector("body", timeout=5000, state="visible")
271
+ logger.debug(f"DEBUG: wait_for_selector('body', state='visible') completed for {self.current_page.url}.")
272
+ except PlaywrightTimeoutError:
273
+ logger.warning(f"DEBUGGING: Playwright Timeout (5s) on body selector for {self.current_page.url}. Continuing anyway.")
274
+ pass
275
+
276
+ except PlaywrightTimeoutError as e:
277
+ logger.error(f"ERROR: Timeout waiting for page readiness for {self.current_page.url}: {e}")
278
+ raise # Re-raise if essential waits fail (e.g., initial domcontentloaded)
279
  except Exception as e:
280
+ logger.error(f"ERROR: An unexpected error occurred during page readiness check for {self.current_page.url}: {e}")
281
  raise
282
 
283
+ # Rest of update_poi: Run the bounding box javascript code to highlight the points of interest on the page
284
  page_info = await self.current_page.evaluate(
285
  """() => {
286
  overwriteDefaultSelectConvergence();
 
319
  centroid["left"] += iframe_offsets[index]["x"]
320
  centroid["top"] += iframe_offsets[index]["y"]
321
  centroid["right"] += iframe_offsets[index]["x"]
322
+ # Fix: Removed duplicate 'centroid["y"] += iframe_offsets[index]["y"]'
323
  centroid["bottom"] += iframe_offsets[index]["y"]
324
  element_centroids.extend(iframe_poi["element_centroids"])
325
 
 
356
  if delay > 0.0:
357
  await asyncio.sleep(delay)
358
  await self.update_poi()
359
+ # Keep original logic if page is highly dynamic, but for static shots, simpler is faster
360
+ # old_poi_positions = [tuple(point) for point in self.poi_centroids]
361
  img = await self.current_page.screenshot(type=type, quality=quality, scale=scale)
362
  annotated_img = annotate_bounding_boxes(image=img, bounding_boxes=self.bounding_boxes)
363
+ # Re-evaluating this block for performance. Removed redundant update_poi and conditional screenshot.
364
+ # If precise screenshot timing is needed, the caller should manage delays and updates.
 
 
 
 
 
 
365
  return img, annotated_img
366
 
367
  async def goto(self, url: str) -> None:
 
468
  await self.current_page.keyboard.press("Control+Home")
469
  await self.current_page.keyboard.press("Control+Shift+End")
470
  await self.current_page.keyboard.press("Backspace")
471
+
472
+ async def open_new_tab_and_go_to(self, url: str) -> None:
473
+ """
474
+ Opens a new browser tab/page and navigates to the specified URL.
475
+ Closes the old page if it's not the last one remaining.
476
+ """
477
+ logger.info(f"Attempting to open a new tab and navigate to: {url}")
478
+ new_page = await self.context.new_page()
479
+
480
+ # Close the previous page if it's not the only one left in the context
481
+ if len(self.context.pages) > 1 and self.current_page and self.current_page != new_page:
482
+ try:
483
+ await self.current_page.close()
484
+ logger.debug("Closed previous page.")
485
+ except Exception as e:
486
+ logger.warning(f"Could not close previous page (might already be closed or detached): {e}")
487
+
488
+ # After navigation, trigger POI update to reflect the new page's state
489
+ await new_page.goto(url, wait_until="domcontentloaded")
490
+ logger.info(f"Successfully navigated to {url} in a new tab.")
491
+ # Crucial: update_poi uses self.current_page, which is now new_page implicitly
492
+ await self.update_poi()
493
 
494
 
495
  if __name__ == "__main__":
 
505
  with open("output.png", "wb") as f:
506
  f.write(annotated_image)
507
 
508
+ asyncio.run(dummy_test())
src/proxy_lite/environments/webbrowser.py CHANGED
@@ -12,7 +12,8 @@ from proxy_lite.environments.environment_base import (
12
  State,
13
  )
14
  from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
15
-
 
16
 
17
  @Environments.register_environment_config("webbrowser")
18
  class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
@@ -75,7 +76,14 @@ class WebBrowserEnvironment(BaseEnvironment):
75
  return []
76
 
77
  async def initialise(self) -> Observation:
78
- await self.browser.goto(self.config.homepage)
 
 
 
 
 
 
 
79
  original_img, annotated_img = await self.browser.screenshot(
80
  delay=self.config.screenshot_delay,
81
  )
@@ -92,6 +100,7 @@ class WebBrowserEnvironment(BaseEnvironment):
92
  if self.config.keep_original_image:
93
  info["original_image"] = base64.b64encode(original_img).decode("utf-8")
94
 
 
95
  return Observation(
96
  state=State(
97
  text=f"URL: {self.browser.current_url}"
@@ -182,4 +191,4 @@ class WebBrowserEnvironment(BaseEnvironment):
182
 
183
  async def get_info(self) -> dict[str, Any]:
184
  info = {}
185
- return info
 
12
  State,
13
  )
14
  from proxy_lite.tools import BrowserTool, Tool, ToolExecutionResponse
15
+ # Import logger from proxy_lite.logger, or if it's already available via BaseEnvironment
16
+ from proxy_lite.logger import logger # Assuming you want to use the same logger
17
 
18
  @Environments.register_environment_config("webbrowser")
19
  class WebBrowserEnvironmentConfig(BaseEnvironmentConfig):
 
76
  return []
77
 
78
  async def initialise(self) -> Observation:
79
+ self.logger.debug(f"DEBUG: Initialising WebBrowserEnvironment. Homepage: {self.config.homepage}")
80
+ try:
81
+ await self.browser.goto(self.config.homepage)
82
+ self.logger.debug(f"DEBUG: Browser navigated to homepage. Current URL: {self.browser.current_url}")
83
+ except Exception as e:
84
+ self.logger.error(f"ERROR: Failed to navigate to homepage {self.config.homepage}: {e}")
85
+ raise # Re-raise to propagate the error
86
+
87
  original_img, annotated_img = await self.browser.screenshot(
88
  delay=self.config.screenshot_delay,
89
  )
 
100
  if self.config.keep_original_image:
101
  info["original_image"] = base64.b64encode(original_img).decode("utf-8")
102
 
103
+ self.logger.debug(f"DEBUG: Initial observation captured. URL: {self.browser.current_url}")
104
  return Observation(
105
  state=State(
106
  text=f"URL: {self.browser.current_url}"
 
191
 
192
  async def get_info(self) -> dict[str, Any]:
193
  info = {}
194
+ return info
src/proxy_lite/tools/browser_tool.py CHANGED
@@ -1,6 +1,6 @@
1
  import asyncio
2
  from contextlib import AsyncExitStack
3
- from typing import List, Literal, Optional
4
 
5
  from pydantic import BaseModel, Field
6
 
@@ -123,6 +123,10 @@ class ReloadParams(BaseModel):
123
  class DoNothingParams(BaseModel):
124
  pass
125
 
 
 
 
 
126
 
127
  class BrowserTool(Tool):
128
  def __init__(self, session: BrowserSession) -> None:
@@ -148,7 +152,7 @@ class BrowserTool(Tool):
148
  async def goto(self, url: str) -> ToolExecutionResponse:
149
  """Go directly to a specific web url. Specify the exact URL."""
150
  await self.browser.goto(url)
151
- return ToolExecutionResponse()
152
 
153
  @attach_param_schema(GoogleSearchParams)
154
  async def google_search(self, query_plan: str, query: str) -> ToolExecutionResponse:
@@ -157,13 +161,22 @@ class BrowserTool(Tool):
157
  """
158
  url = f"https://www.google.com/search?q={query}"
159
  await self.browser.goto(url)
160
- return ToolExecutionResponse()
161
 
162
  @attach_param_schema(ClickParams)
163
  async def click(self, mark_id: int) -> ToolExecutionResponse:
164
  """Click on an element of the page."""
165
- await self.browser.click(mark_id=mark_id)
166
- return ToolExecutionResponse()
 
 
 
 
 
 
 
 
 
167
 
168
  @attach_param_schema(TypeParams)
169
  async def type(self, entries: List[dict], submit: bool) -> ToolExecutionResponse:
@@ -171,51 +184,101 @@ class BrowserTool(Tool):
171
  You can type into one or more elements.
172
  Note that the text inside an element is cleared before typing.
173
  """
 
174
  for i, entry_dict in enumerate(entries):
175
- entry = TypeEntry(**entry_dict)
176
- last_entry = i == len(entries) - 1
177
- old_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
178
- await self.browser.enter_text(
179
- mark_id=entry.mark_id,
180
- text=entry.content,
181
- submit=submit and last_entry,
182
- )
183
- await self.browser.update_poi()
184
- new_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
185
- if not last_entry and old_poi_positions != new_poi_positions:
186
- logger.error(
187
- "POI positions changed mid-typing, cancelling future type entries.",
188
  )
189
- break
190
- return ToolExecutionResponse()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  @attach_param_schema(ScrollParams)
193
  async def scroll(self, direction: str, mark_id: int) -> ToolExecutionResponse:
194
  """Scroll the page (or a scrollable element) up, down, left or right."""
195
- if mark_id == -1:
196
- mark_id = None
197
- await self.browser.scroll(direction=direction, mark_id=mark_id)
198
- return ToolExecutionResponse()
 
 
 
 
 
 
 
 
 
 
199
 
200
  @attach_param_schema(BackParams)
201
  async def back(self) -> ToolExecutionResponse:
202
  """Go back to the previous page."""
203
- await self.browser.go_back()
204
- return ToolExecutionResponse()
 
 
 
 
 
205
 
206
  @attach_param_schema(WaitParams)
207
  async def wait(self) -> ToolExecutionResponse:
208
  """Wait three seconds. Useful when the page appears to still be loading, or if there are any unfinished webpage processes.""" # noqa: E501
209
  await asyncio.sleep(3)
210
- return ToolExecutionResponse()
211
 
212
  @attach_param_schema(ReloadParams)
213
  async def reload(self) -> ToolExecutionResponse:
214
  """Reload the current page. Useful when the page seems unresponsive, broken, outdated, or if you want to reset the page to its initial state.""" # noqa: E501
215
- await self.browser.reload()
216
- return ToolExecutionResponse()
 
 
 
 
 
217
 
218
  @attach_param_schema(DoNothingParams)
219
  async def do_nothing_tool(self) -> ToolExecutionResponse:
220
  """Do nothing. Use this if you have no need for the browser at this time."""
221
- return ToolExecutionResponse()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  from contextlib import AsyncExitStack
3
+ from typing import List, Literal, Optional, Any
4
 
5
  from pydantic import BaseModel, Field
6
 
 
123
  class DoNothingParams(BaseModel):
124
  pass
125
 
126
+ # --- NEW: Parameters for open_new_tab_and_go_to tool ---
127
+ class OpenNewTabAndGoToParams(BaseModel):
128
+ url: str = Field(..., description="The URL to navigate to in the new tab.")
129
+
130
 
131
  class BrowserTool(Tool):
132
  def __init__(self, session: BrowserSession) -> None:
 
152
  async def goto(self, url: str) -> ToolExecutionResponse:
153
  """Go directly to a specific web url. Specify the exact URL."""
154
  await self.browser.goto(url)
155
+ return ToolExecutionResponse(observation=f"Successfully navigated to URL: {url}") # Added observation
156
 
157
  @attach_param_schema(GoogleSearchParams)
158
  async def google_search(self, query_plan: str, query: str) -> ToolExecutionResponse:
 
161
  """
162
  url = f"https://www.google.com/search?q={query}"
163
  await self.browser.goto(url)
164
+ return ToolExecutionResponse(observation=f"Performed Google search for: {query}") # Added observation
165
 
166
  @attach_param_schema(ClickParams)
167
  async def click(self, mark_id: int) -> ToolExecutionResponse:
168
  """Click on an element of the page."""
169
+ try:
170
+ await self.browser.click(mark_id=mark_id)
171
+ return ToolExecutionResponse(observation=f"Clicked element with mark ID: {mark_id}")
172
+ except IndexError as e:
173
+ # This happens if mark_id is out of bounds for browser.poi_centroids
174
+ logger.error(f"Click failed: Mark ID {mark_id} not found or POI list empty. Error: {e}")
175
+ return ToolExecutionResponse(observation=f"Failed to click element with mark ID {mark_id}. Element not found or POI list invalid.")
176
+ except Exception as e:
177
+ logger.error(f"Click failed with unexpected error for mark ID {mark_id}: {e}")
178
+ return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to click element {mark_id}: {e}")
179
+
180
 
181
  @attach_param_schema(TypeParams)
182
  async def type(self, entries: List[dict], submit: bool) -> ToolExecutionResponse:
 
184
  You can type into one or more elements.
185
  Note that the text inside an element is cleared before typing.
186
  """
187
+ typed_ids = []
188
  for i, entry_dict in enumerate(entries):
189
+ try:
190
+ entry = TypeEntry(**entry_dict)
191
+ last_entry = i == len(entries) - 1
192
+ old_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
193
+ await self.browser.enter_text(
194
+ mark_id=entry.mark_id,
195
+ text=entry.content,
196
+ submit=submit and last_entry,
 
 
 
 
 
197
  )
198
+ typed_ids.append(entry.mark_id)
199
+ await self.browser.update_poi()
200
+ new_poi_positions = [tuple(point) for point in self.browser.poi_centroids]
201
+ if not last_entry and old_poi_positions != new_poi_positions:
202
+ logger.error(
203
+ "POI positions changed mid-typing, cancelling future type entries.",
204
+ )
205
+ break
206
+ except IndexError as e:
207
+ logger.error(f"Type failed: Mark ID {entry.mark_id} not found or POI list empty. Error: {e}")
208
+ return ToolExecutionResponse(observation=f"Failed to type into element with mark ID {entry.mark_id}. Element not found or POI list invalid. Typed into: {typed_ids if typed_ids else 'none'}.")
209
+ except Exception as e:
210
+ logger.error(f"Type failed with unexpected error for mark ID {entry.mark_id}: {e}")
211
+ return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to type into element {entry.mark_id}: {e}. Typed into: {typed_ids if typed_ids else 'none'}.")
212
+
213
+ return ToolExecutionResponse(
214
+ observation=f"Typed text into elements with mark IDs: {typed_ids}",
215
+ )
216
 
217
  @attach_param_schema(ScrollParams)
218
  async def scroll(self, direction: str, mark_id: int) -> ToolExecutionResponse:
219
  """Scroll the page (or a scrollable element) up, down, left or right."""
220
+ try:
221
+ if mark_id == -1:
222
+ mark_id_for_browser = None # Pass None to browser.scroll for page scroll
223
+ else:
224
+ mark_id_for_browser = mark_id
225
+
226
+ await self.browser.scroll(direction=direction, mark_id=mark_id_for_browser)
227
+ return ToolExecutionResponse(observation=f"Scrolled {direction} on element with mark ID: {mark_id if mark_id != -1 else 'page'}")
228
+ except IndexError as e:
229
+ logger.error(f"Scroll failed: Mark ID {mark_id} not found or POI list empty. Error: {e}")
230
+ return ToolExecutionResponse(observation=f"Failed to scroll element with mark ID {mark_id}. Element not found or POI list invalid.")
231
+ except Exception as e:
232
+ logger.error(f"Scroll failed with unexpected error for mark ID {mark_id}: {e}")
233
+ return ToolExecutionResponse(observation=f"An unexpected error occurred while trying to scroll element {mark_id}: {e}")
234
 
235
  @attach_param_schema(BackParams)
236
  async def back(self) -> ToolExecutionResponse:
237
  """Go back to the previous page."""
238
+ try:
239
+ await self.browser.go_back()
240
+ return ToolExecutionResponse(observation="Went back to the previous page.")
241
+ except Exception as e:
242
+ logger.error(f"Go back failed: {e}")
243
+ return ToolExecutionResponse(observation=f"Failed to go back: {e}")
244
+
245
 
246
  @attach_param_schema(WaitParams)
247
  async def wait(self) -> ToolExecutionResponse:
248
  """Wait three seconds. Useful when the page appears to still be loading, or if there are any unfinished webpage processes.""" # noqa: E501
249
  await asyncio.sleep(3)
250
+ return ToolExecutionResponse(observation="Waited for a few seconds.")
251
 
252
  @attach_param_schema(ReloadParams)
253
  async def reload(self) -> ToolExecutionResponse:
254
  """Reload the current page. Useful when the page seems unresponsive, broken, outdated, or if you want to reset the page to its initial state.""" # noqa: E501
255
+ try:
256
+ await self.browser.reload()
257
+ return ToolExecutionResponse(observation="Reloaded the current page.")
258
+ except Exception as e:
259
+ logger.error(f"Reload failed: {e}")
260
+ return ToolExecutionResponse(observation=f"Failed to reload the page: {e}")
261
+
262
 
263
  @attach_param_schema(DoNothingParams)
264
  async def do_nothing_tool(self) -> ToolExecutionResponse:
265
  """Do nothing. Use this if you have no need for the browser at this time."""
266
+ return ToolExecutionResponse(observation="Did nothing in the browser.")
267
+
268
+ # --- NEW: Expose the open_new_tab_and_go_to method as a tool ---
269
+ @attach_param_schema(OpenNewTabAndGoToParams)
270
+ async def open_new_tab_and_go_to(self, url: str) -> ToolExecutionResponse:
271
+ """
272
+ Opens a new browser tab/page and navigates to the specified URL.
273
+ Closes the old page if it's not the last one remaining.
274
+ Use this to bypass loading issues by forcing a new navigation.
275
+ """
276
+ try:
277
+ await self.browser.open_new_tab_and_go_to(url)
278
+ return ToolExecutionResponse(
279
+ observation=f"Successfully opened new tab and navigated to: {url}",
280
+ )
281
+ except Exception as e:
282
+ logger.error(f"Error opening new tab and navigating to {url}: {e}")
283
+ return ToolExecutionResponse(observation=f"Failed to open new tab and navigate to {url}: {e}")
284
+