Trisha Tomy commited on
Commit
928d95c
·
1 Parent(s): 7af9344

trying fixes for loading

Browse files
Files changed (1) hide show
  1. src/proxy_lite/browser/browser.py +43 -117
src/proxy_lite/browser/browser.py CHANGED
@@ -15,7 +15,7 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
18
- import base64 # Make sure this import is present!
19
 
20
  SELF_CONTAINED_TAGS = [
21
  # many of these are non-interactive but keeping them anyway
@@ -107,10 +107,8 @@ class BrowserSession:
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
  await self.context.new_page()
110
- # Set default timeouts for context and page
111
- self.context.set_default_timeout(180_000) # Increased based on previous discussions
112
- self.current_page.set_default_timeout(180_000) # Increased based on previous discussions
113
-
114
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
115
  await self.context.add_init_script(
116
  path=Path(__file__).with_name("add_custom_select.js"),
@@ -187,31 +185,31 @@ class BrowserSession:
187
  )
188
  async def update_poi(self) -> None:
189
  try:
190
- # Step 1: Wait for DOMContentLoaded. This ensures the basic HTML structure is parsed.
191
- logger.debug("Attempting wait_for_load_state('domcontentloaded')...")
192
- await self.current_page.wait_for_load_state("domcontentloaded", timeout=180000)
193
- logger.debug("wait_for_load_state('domcontentloaded') completed.")
194
-
195
- # Step 2: Wait for the specific text "Account Forecasting" to be visible on the page.
196
- # This is a strong indicator that the core content for the task has loaded.
197
- target_text = "Account Forecasting"
198
- logger.debug(f"Attempting to wait for text: '{target_text}' to be visible...")
199
- await self.current_page.wait_for_selector(f"text={target_text}", timeout=180000, state="visible")
200
- logger.debug(f"Text '{target_text}' became visible.")
201
-
202
- # Optional: You can still add a wait for network idle *after* the text is visible
203
- # if the page still isn't interactive immediately, but prioritize the text.
204
- # try:
205
- # await self.current_page.wait_for_load_state("networkidle", timeout=60000) # Shorter timeout here
206
- # logger.debug("wait_for_load_state('networkidle') completed after text appeared.")
207
- # except PlaywrightTimeoutError:
208
- # logger.warning("Network idle state not reached after text appeared, but proceeding.")
209
 
210
  except PlaywrightTimeoutError as e:
211
  # --- START TEMPORARY DEBUGGING CODE ---
212
  # This block captures state specifically when a Playwright timeout occurs
213
  current_url = self.current_page.url if self.current_page else "N/A"
214
- logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check for text '{target_text}' at URL: {current_url}")
215
 
216
  html_content = None
217
  try:
@@ -224,89 +222,17 @@ class BrowserSession:
224
  screenshot_b64 = "N/A"
225
  try:
226
  if self.current_page:
 
 
227
  screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
228
  screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
 
229
  logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
230
- except Exception as ss_e:
231
- logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
232
-
233
- # It's crucial to re-raise the exception so the upstream code knows the operation failed.
234
- raise e
235
- # --- END TEMPORARY DEBUGGING CODE ---
236
-
237
- except Exception as e:
238
- # This catches any other unexpected errors during the page readiness checks
239
- logger.error(f"An unexpected error occurred during page readiness check: {e}")
240
- raise # Re-raise to propagate
241
-
242
- # --- Code below this point will only execute if the page readiness checks pass ---
243
- # Ensure this block is at the SAME INDENTATION LEVEL as the 'try' and 'except' above.
244
-
245
- # Run the bounding box javascript code to highlight the points of interest on the page
246
- # This part assumes the page is now ready for interaction and content extraction.
247
- page_info = await self.current_page.evaluate(
248
- """() => {
249
- overwriteDefaultSelectConvergence();
250
- return findPOIsConvergence();
251
- }""",
252
- )
253
- # Get the points of interest on the page
254
- self.poi_elements = page_info["element_descriptions"]
255
- element_centroids = page_info["element_centroids"]
256
- try: # This is a new try block for iframe processing
257
- # Select all iframes on the page
258
- iframes = await self.current_page.query_selector_all("iframe")
259
-
260
- max_iframes = 10 # Limit the number of iframes to process for performance
261
-
262
- # Define an asynchronous function to process and filter each iframe
263
- tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
264
-
265
- # Gather results from iframe processing tasks concurrently
266
- results = await asyncio.gather(*tasks)
267
-
268
- # Filter out any None results from iframe processing errors or non-visible iframes
269
- filtered_results = [result for result in results if result is not None]
270
-
271
- iframes_pois = []
272
- iframe_offsets = []
273
-
274
- for poi, offset in filtered_results:
275
- iframes_pois.append(poi)
276
- iframe_offsets.append(offset)
277
-
278
- # Combine the points of interest from the iframes with the main page and adjust the centroids
279
- for index, iframe_poi in enumerate(iframes_pois):
280
- self.poi_elements.extend(iframe_poi["element_descriptions"])
281
- for centroid in iframe_poi["element_centroids"]:
282
- # Adjust iframe POI coordinates based on iframe's position on the main page
283
- centroid["x"] += iframe_offsets[index]["x"]
284
- centroid["y"] += iframe_offsets[index]["y"]
285
- centroid["left"] += iframe_offsets[index]["x"]
286
- centroid["top"] += iframe_offsets[index]["y"]
287
- centroid["right"] += iframe_offsets[index]["x"]
288
- centroid["bottom"] += iframe_offsets[index]["y"]
289
- element_centroids.extend(iframe_poi["element_centroids"])
290
-
291
- except Exception as e:
292
- logger.error(f"Error in finding iframes: {e}")
293
- # Do not re-raise here unless iframe parsing is critical for the main task
294
- # Iframes not found is often not a fatal error for core functionality.
295
-
296
- # Get the centroids of the points of interest
297
- self.poi_centroids = [Point(x=xy["x"], y=xy["y"]) for xy in element_centroids]
298
- # Create BoundingBox objects for annotation
299
- self.bounding_boxes = [BoundingBox(**xy, label=str(i)) for i, xy in enumerate(element_centroids)]
300
- # Create POI objects which combine info, centroid, and bounding box
301
- self.pois = [
302
- POI(info=info, element_centroid=centroid, bounding_box=bbox)
303
- for info, centroid, bbox in zip(
304
- self.poi_elements,
305
- self.poi_centroids,
306
- self.bounding_boxes,
307
- strict=False, # Use strict=False if lengths might genuinely differ slightly
308
- )
309
- ]
310
 
311
  @property
312
  def poi_text(self) -> str:
@@ -444,17 +370,17 @@ class BrowserSession:
444
  await self.current_page.keyboard.press("Backspace")
445
 
446
 
447
- if __name__ == "__main__":
448
 
449
- async def dummy_test():
450
- async with BrowserSession(headless=False) as s:
451
- page = await s.context.new_page()
452
- await page.goto("http://google.co.uk")
453
- await asyncio.sleep(5)
454
- await page.screenshot(path="example.png")
455
- await s.update_poi()
456
- _, annotated_image = await s.screenshot()
457
- with open("output.png", "wb") as f:
458
- f.write(annotated_image)
459
 
460
- asyncio.run(dummy_test())
 
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
18
+ import base64
19
 
20
  SELF_CONTAINED_TAGS = [
21
  # many of these are non-interactive but keeping them anyway
 
107
  viewport={"width": self.viewport_width, "height": self.viewport_height},
108
  )
109
  await self.context.new_page()
110
+ self.context.set_default_timeout(60_000)
111
+ self.current_page.set_default_timeout(60_000)
 
 
112
  await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
113
  await self.context.add_init_script(
114
  path=Path(__file__).with_name("add_custom_select.js"),
 
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
+ # Step 1: Wait for network to be idle. This indicates that initial requests have settled.
189
+ logger.debug("Attempting wait_for_load_state('networkidle')...")
190
+ await self.current_page.wait_for_load_state("networkidle", timeout=180000) # Increased timeout
191
+ logger.debug("wait_for_load_state('networkidle') completed.")
192
+
193
+ # Step 2: Wait for the 'loading' class to disappear from the body.
194
+ # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
195
+ logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
196
+ # Removed state="visible" as it's often too strict for 'body' in SPAs,
197
+ # and 'not(.loading)' implies it should become visible eventually.
198
+ await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
199
+ logger.debug("wait_for_selector('body:not(.loading)') completed.")
200
+
201
+ # Optional Step 3 (Highly Recommended): If the above still times out,
202
+ # uncomment and replace with a reliable selector for an interactive element
203
+ # that only appears after the Salesforce UI is fully loaded and ready for user input.
204
+ # Example: await self.current_page.wait_for_selector("#some_salesforce_specific_id", timeout=180000, state="visible")
205
+ # Example: await self.current_page.wait_for_selector("text=App Launcher", timeout=180000, state="visible")
206
+ # For now, we'll rely on the 'body:not(.loading)' as the primary indicator.
207
 
208
  except PlaywrightTimeoutError as e:
209
  # --- START TEMPORARY DEBUGGING CODE ---
210
  # This block captures state specifically when a Playwright timeout occurs
211
  current_url = self.current_page.url if self.current_page else "N/A"
212
+ logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
213
 
214
  html_content = None
215
  try:
 
222
  screenshot_b64 = "N/A"
223
  try:
224
  if self.current_page:
225
+ # Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
226
+ # Higher quality might make logs too large for some platforms.
227
  screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
228
  screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
229
+ # Log only a very short snippet of base64 string to confirm it's there
230
  logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
231
+
232
+ # If you want to view the full screenshot locally during development, you can save it:
233
+ # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
234
+ # f.write(screenshot_bytes)
235
+ # logger.error("DEBUGGING: Full screenshot saved to debug_timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  @property
238
  def poi_text(self) -> str:
 
370
  await self.current_page.keyboard.press("Backspace")
371
 
372
 
373
+ if __name__ == "__main__":
374
 
375
+ async def dummy_test():
376
+ async with BrowserSession(headless=False) as s:
377
+ page = await s.context.new_page()
378
+ await page.goto("http://google.co.uk")
379
+ await asyncio.sleep(5)
380
+ await page.screenshot(path="example.png")
381
+ await s.update_poi()
382
+ _, annotated_image = await s.screenshot()
383
+ with open("output.png", "wb") as f:
384
+ f.write(annotated_image)
385
 
386
+ asyncio.run(dummy_test())