Spaces:
Running
Running
Trisha Tomy
commited on
Commit
·
7af9344
1
Parent(s):
e9a2867
trying fixes for loading
Browse files
src/proxy_lite/browser/browser.py
CHANGED
@@ -15,7 +15,7 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
|
|
15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
16 |
from proxy_lite.logger import logger
|
17 |
|
18 |
-
import base64 #
|
19 |
|
20 |
SELF_CONTAINED_TAGS = [
|
21 |
# many of these are non-interactive but keeping them anyway
|
@@ -144,7 +144,7 @@ class BrowserSession:
|
|
144 |
# re-run for cases of mid-run redirects
|
145 |
@retry(
|
146 |
wait=wait_exponential(multiplier=1, min=1, max=10),
|
147 |
-
stop=stop_after_delay(5),
|
148 |
reraise=True,
|
149 |
before_sleep=before_sleep_log(logger, logging.ERROR),
|
150 |
)
|
@@ -187,29 +187,31 @@ class BrowserSession:
|
|
187 |
)
|
188 |
async def update_poi(self) -> None:
|
189 |
try:
|
190 |
-
# Step 1: Wait for
|
191 |
-
logger.debug("Attempting wait_for_load_state('
|
192 |
-
await self.current_page.wait_for_load_state("
|
193 |
-
logger.debug("wait_for_load_state('
|
194 |
-
|
195 |
-
# Step 2: Wait for the
|
196 |
-
# This is a
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
#
|
203 |
-
#
|
204 |
-
#
|
205 |
-
#
|
206 |
-
#
|
|
|
|
|
207 |
|
208 |
except PlaywrightTimeoutError as e:
|
209 |
# --- START TEMPORARY DEBUGGING CODE ---
|
210 |
# This block captures state specifically when a Playwright timeout occurs
|
211 |
current_url = self.current_page.url if self.current_page else "N/A"
|
212 |
-
logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
|
213 |
|
214 |
html_content = None
|
215 |
try:
|
@@ -222,18 +224,9 @@ class BrowserSession:
|
|
222 |
screenshot_b64 = "N/A"
|
223 |
try:
|
224 |
if self.current_page:
|
225 |
-
# Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
|
226 |
-
# Higher quality might make logs too large for some platforms.
|
227 |
screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
|
228 |
screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
229 |
-
# Log only a very short snippet of base64 string to confirm it's there
|
230 |
logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
|
231 |
-
|
232 |
-
# If you want to view the full screenshot locally during development, you can save it:
|
233 |
-
# with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
|
234 |
-
# f.write(screenshot_bytes)
|
235 |
-
# logger.error("DEBUGGING: Full screenshot saved to debug_timeout_full_screenshot.jpeg for local inspection.")
|
236 |
-
|
237 |
except Exception as ss_e:
|
238 |
logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
|
239 |
|
@@ -451,17 +444,17 @@ class BrowserSession:
|
|
451 |
await self.current_page.keyboard.press("Backspace")
|
452 |
|
453 |
|
454 |
-
if __name__ == "__main__":
|
455 |
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
|
467 |
-
|
|
|
15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
16 |
from proxy_lite.logger import logger
|
17 |
|
18 |
+
import base64 # Make sure this import is present!
|
19 |
|
20 |
SELF_CONTAINED_TAGS = [
|
21 |
# many of these are non-interactive but keeping them anyway
|
|
|
144 |
# re-run for cases of mid-run redirects
|
145 |
@retry(
|
146 |
wait=wait_exponential(multiplier=1, min=1, max=10),
|
147 |
+
stop=stop_after_delay(5),
|
148 |
reraise=True,
|
149 |
before_sleep=before_sleep_log(logger, logging.ERROR),
|
150 |
)
|
|
|
187 |
)
|
188 |
async def update_poi(self) -> None:
|
189 |
try:
|
190 |
+
# Step 1: Wait for DOMContentLoaded. This ensures the basic HTML structure is parsed.
|
191 |
+
logger.debug("Attempting wait_for_load_state('domcontentloaded')...")
|
192 |
+
await self.current_page.wait_for_load_state("domcontentloaded", timeout=180000)
|
193 |
+
logger.debug("wait_for_load_state('domcontentloaded') completed.")
|
194 |
+
|
195 |
+
# Step 2: Wait for the specific text "Account Forecasting" to be visible on the page.
|
196 |
+
# This is a strong indicator that the core content for the task has loaded.
|
197 |
+
target_text = "Account Forecasting"
|
198 |
+
logger.debug(f"Attempting to wait for text: '{target_text}' to be visible...")
|
199 |
+
await self.current_page.wait_for_selector(f"text={target_text}", timeout=180000, state="visible")
|
200 |
+
logger.debug(f"Text '{target_text}' became visible.")
|
201 |
+
|
202 |
+
# Optional: You can still add a wait for network idle *after* the text is visible
|
203 |
+
# if the page still isn't interactive immediately, but prioritize the text.
|
204 |
+
# try:
|
205 |
+
# await self.current_page.wait_for_load_state("networkidle", timeout=60000) # Shorter timeout here
|
206 |
+
# logger.debug("wait_for_load_state('networkidle') completed after text appeared.")
|
207 |
+
# except PlaywrightTimeoutError:
|
208 |
+
# logger.warning("Network idle state not reached after text appeared, but proceeding.")
|
209 |
|
210 |
except PlaywrightTimeoutError as e:
|
211 |
# --- START TEMPORARY DEBUGGING CODE ---
|
212 |
# This block captures state specifically when a Playwright timeout occurs
|
213 |
current_url = self.current_page.url if self.current_page else "N/A"
|
214 |
+
logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check for text '{target_text}' at URL: {current_url}")
|
215 |
|
216 |
html_content = None
|
217 |
try:
|
|
|
224 |
screenshot_b64 = "N/A"
|
225 |
try:
|
226 |
if self.current_page:
|
|
|
|
|
227 |
screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
|
228 |
screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
|
229 |
logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
except Exception as ss_e:
|
231 |
logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
|
232 |
|
|
|
444 |
await self.current_page.keyboard.press("Backspace")
|
445 |
|
446 |
|
447 |
+
if __name__ == "__main__":
|
448 |
|
449 |
+
async def dummy_test():
|
450 |
+
async with BrowserSession(headless=False) as s:
|
451 |
+
page = await s.context.new_page()
|
452 |
+
await page.goto("http://google.co.uk")
|
453 |
+
await asyncio.sleep(5)
|
454 |
+
await page.screenshot(path="example.png")
|
455 |
+
await s.update_poi()
|
456 |
+
_, annotated_image = await s.screenshot()
|
457 |
+
with open("output.png", "wb") as f:
|
458 |
+
f.write(annotated_image)
|
459 |
|
460 |
+
asyncio.run(dummy_test())
|