Trisha Tomy commited on
Commit
a4a1211
·
1 Parent(s): 928d95c

trying fixes for loading

Browse files
Files changed (1) hide show
  1. src/proxy_lite/browser/browser.py +101 -45
src/proxy_lite/browser/browser.py CHANGED
@@ -179,60 +179,116 @@ class BrowserSession:
179
  # re-run for cases of mid-run redirects
180
  @retry(
181
  wait=wait_exponential(multiplier=1, min=1, max=10),
182
- stop=stop_after_delay(5), # This retry is for the entire update_poi method, in case of intermittent issues
183
  reraise=True,
184
  before_sleep=before_sleep_log(logger, logging.ERROR),
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
- # Step 1: Wait for network to be idle. This indicates that initial requests have settled.
189
- logger.debug("Attempting wait_for_load_state('networkidle')...")
190
- await self.current_page.wait_for_load_state("networkidle", timeout=180000) # Increased timeout
191
  logger.debug("wait_for_load_state('networkidle') completed.")
192
 
193
- # Step 2: Wait for the 'loading' class to disappear from the body.
194
- # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
195
- logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
196
- # Removed state="visible" as it's often too strict for 'body' in SPAs,
197
- # and 'not(.loading)' implies it should become visible eventually.
198
- await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
199
- logger.debug("wait_for_selector('body:not(.loading)') completed.")
200
-
201
- # Optional Step 3 (Highly Recommended): If the above still times out,
202
- # uncomment and replace with a reliable selector for an interactive element
203
- # that only appears after the Salesforce UI is fully loaded and ready for user input.
204
- # Example: await self.current_page.wait_for_selector("#some_salesforce_specific_id", timeout=180000, state="visible")
205
- # Example: await self.current_page.wait_for_selector("text=App Launcher", timeout=180000, state="visible")
206
- # For now, we'll rely on the 'body:not(.loading)' as the primary indicator.
207
-
208
- except PlaywrightTimeoutError as e:
209
- # --- START TEMPORARY DEBUGGING CODE ---
210
- # This block captures state specifically when a Playwright timeout occurs
211
- current_url = self.current_page.url if self.current_page else "N/A"
212
- logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
213
-
214
- html_content = None
215
  try:
216
- if self.current_page:
217
- html_content = await self.current_page.content()
218
- logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
219
- except Exception as html_e:
220
- logger.error(f"DEBUGGING: Could not get HTML content for debug: {html_e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- screenshot_b64 = "N/A"
223
- try:
224
- if self.current_page:
225
- # Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
226
- # Higher quality might make logs too large for some platforms.
227
- screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
228
- screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
229
- # Log only a very short snippet of base64 string to confirm it's there
230
- logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
231
-
232
- # If you want to view the full screenshot locally during development, you can save it:
233
- # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
234
- # f.write(screenshot_bytes)
235
- # logger.error("DEBUGGING: Full screenshot saved to debug_timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  @property
238
  def poi_text(self) -> str:
 
179
  # re-run for cases of mid-run redirects
180
  @retry(
181
  wait=wait_exponential(multiplier=1, min=1, max=10),
182
+ stop=stop_after_delay(5),
183
  reraise=True,
184
  before_sleep=before_sleep_log(logger, logging.ERROR),
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
+ # Added for robustness based on previous discussions
189
+ await self.current_page.wait_for_load_state("networkidle", timeout=180000)
 
190
  logger.debug("wait_for_load_state('networkidle') completed.")
191
 
192
+ # This is the line that was previously timing out, now with increased timeout.
193
+ # Adding explicit try/except around it for specific debugging.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  try:
195
+ await self.current_page.wait_for_selector("body", timeout=180000, state="visible")
196
+ logger.debug("wait_for_selector('body', state='visible') completed.")
197
+ except PlaywrightTimeoutError as e:
198
+ # --- START TEMPORARY DEBUGGING CODE ---
199
+ current_url = self.current_page.url if self.current_page else "N/A"
200
+ logger.error(f"DEBUGGING: Playwright Timeout (180s) on body selector at URL: {current_url}")
201
+
202
+ html_content = None
203
+ try:
204
+ if self.current_page:
205
+ html_content = await self.current_page.content()
206
+ # Log only a snippet of HTML to avoid excessively large logs
207
+ logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
208
+ except Exception as html_e:
209
+ logger.error(f"DEBUGGING: Could not get HTML content: {html_e}")
210
+
211
+ screenshot_b64 = "N/A"
212
+ try:
213
+ if self.current_page:
214
+ # Capture screenshot at lower quality to keep log size manageable
215
+ screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
216
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
217
+ # Log only a very short snippet of base64 string
218
+ logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
219
+ except Exception as ss_e:
220
+ logger.error(f"DEBUGGING: Could not take screenshot: {ss_e}")
221
+
222
+ # Re-raise the original exception to ensure the task still fails,
223
+ # but now with crucial debugging information in the logs.
224
+ raise e
225
+ # --- END TEMPORARY DEBUGGING CODE ---
226
+
227
+ except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
228
+ logger.error(f"Timeout waiting for website load state (networkidle): {self.current_url}")
229
+ raise # Re-raise if load_state itself times out
230
 
231
+ except Exception as e:
232
+ logger.error(f"An unexpected error occurred during page readiness check: {e}")
233
+ raise
234
+
235
+ # Run the bounding box javascript code to highlight the points of interest on the page
236
+ page_info = await self.current_page.evaluate(
237
+ """() => {
238
+ overwriteDefaultSelectConvergence();
239
+ return findPOIsConvergence();
240
+ }""",
241
+ )
242
+ # Get the points of interest on the page
243
+ self.poi_elements = page_info["element_descriptions"]
244
+ element_centroids = page_info["element_centroids"]
245
+ try:
246
+ # Select all iframes on the page
247
+ iframes = await self.current_page.query_selector_all("iframe")
248
+
249
+ max_iframes = 10
250
+
251
+ # Define an asynchronous function to process and filter each iframe
252
+ tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
253
+
254
+ results = await asyncio.gather(*tasks)
255
+
256
+ filtered_results = [result for result in results if result is not None]
257
+
258
+ iframes_pois = []
259
+ iframe_offsets = []
260
+
261
+ for poi, offset in filtered_results:
262
+ iframes_pois.append(poi)
263
+ iframe_offsets.append(offset)
264
+
265
+ # Combine the points of interest from the iframes with the main page and adjust the centroids
266
+ for index, iframe_poi in enumerate(iframes_pois):
267
+ self.poi_elements.extend(iframe_poi["element_descriptions"])
268
+ for centroid in iframe_poi["element_centroids"]:
269
+ centroid["x"] += iframe_offsets[index]["x"]
270
+ centroid["y"] += iframe_offsets[index]["y"]
271
+ centroid["left"] += iframe_offsets[index]["x"]
272
+ centroid["top"] += iframe_offsets[index]["y"]
273
+ centroid["right"] += iframe_offsets[index]["x"]
274
+ centroid["bottom"] += iframe_offsets[index]["y"]
275
+ element_centroids.extend(iframe_poi["element_centroids"])
276
+
277
+ except Exception as e:
278
+ logger.error(f"Error in finding iframes: {e}")
279
+
280
+ # Get the centroids of the points of interest
281
+ self.poi_centroids = [Point(x=xy["x"], y=xy["y"]) for xy in element_centroids]
282
+ self.bounding_boxes = [BoundingBox(**xy, label=str(i)) for i, xy in enumerate(element_centroids)]
283
+ self.pois = [
284
+ POI(info=info, element_centroid=centroid, bounding_box=bbox)
285
+ for info, centroid, bbox in zip(
286
+ self.poi_elements,
287
+ self.poi_centroids,
288
+ self.bounding_boxes,
289
+ strict=False,
290
+ )
291
+ ]
292
 
293
  @property
294
  def poi_text(self) -> str: