Trisha Tomy commited on
Commit
5aa95b3
·
1 Parent(s): dab2696

TEMP: Added debug logging for body selector timeout

Browse files
Files changed (2) hide show
  1. app.py +29 -48
  2. src/proxy_lite/browser/browser.py +49 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gevent.monkey
2
- gevent.monkey.patch_all(asyncio=True)
3
- # app.py
4
- import asyncio
5
  from flask import Flask, request, jsonify
6
  from proxy_lite import Runner, RunnerConfig
7
  import os
@@ -19,23 +19,20 @@ async def initialize_runner():
19
  if _runner is None:
20
  logger.info("Initializing Proxy-lite Runner...")
21
 
22
- # Retrieve Hugging Face API token from environment variables
23
  hf_api_token = os.environ.get("HF_API_TOKEN")
24
  if not hf_api_token:
25
  logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
26
  raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
27
 
28
- # Define RunnerConfig
29
  config = RunnerConfig.from_dict({
30
  "environment": {
31
  "name": "webbrowser",
32
  "homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
33
- "headless": True, # Keep headless for production environments
34
- "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"] ,
35
- # "environment_timeout": 1800.0, # <-- THIS LINE WAS IN THE WRONG PLACE IN app.py
36
- "screenshot_delay": 3.0, # Increased delay for full page render before screenshot
37
- "include_html": True, # Include full HTML for richer observations
38
- "include_poi_text": True, # Keep including points of interest text
39
  },
40
  "solver": {
41
  "name": "simple",
@@ -49,50 +46,34 @@ async def initialize_runner():
49
  }
50
  }
51
  },
52
- # --- MOVE environment_timeout HERE, AS A TOP-LEVEL KEY ---
53
- "environment_timeout": 1800.0,
54
- # You can also set other top-level RunnerConfig fields here if desired,
55
- # e.g., "action_timeout": 1800.0, "max_steps": 150, "task_timeout": 18000.0,
56
- # Ensure these match your desired values and not the defaults from RunnerConfig.
57
- # Based on your logs, action_timeout and task_timeout seem to be default, so let's add them:
58
- "action_timeout": 1800.0, # As per your earlier runner.py __main__ block
59
- "task_timeout": 18000.0, # As per your earlier runner.py __main__ block
60
- "max_steps": 150, # As per your earlier runner.py __main__ block
61
- "logger_level": "DEBUG", # Set this to DEBUG for more detailed logging during troubleshooting
62
- # --- END OF MOVED KEY ---
63
  })
64
 
65
- # --- ADDED DEBUG LOGGING HERE ---
66
  logger.info(f"DEBUG: app.py - Initializing Runner with environment_timeout: {config.environment_timeout} seconds")
67
- logger.info(f"DEBUG: app.py - Full config used: {config.model_dump_json(indent=2)}") # For Pydantic v2
68
- # If you are using Pydantic v1, use: logger.info(f"DEBUG: app.py - Full config used: {config.json(indent=2)}")
69
- # --- END ADDED DEBUG LOGGING ---
70
 
71
  _runner = Runner(config=config)
72
  logger.info("Proxy-lite Runner initialized successfully.")
73
  return _runner
74
 
75
- # --- MODIFIED run_async_task FUNCTION ---
76
- def run_async_task(coro):
77
- """
78
- Helper to run async coroutines in a synchronous context (like Flask routes).
79
- Ensures an event loop exists and runs the coroutine.
80
- """
81
- try:
82
- # Try to get the running loop (for current thread/greenlet)
83
- loop = asyncio.get_running_loop()
84
- except RuntimeError:
85
- # If no loop is running, create a new one for this thread
86
- loop = asyncio.new_event_loop()
87
- asyncio.set_event_loop(loop)
88
-
89
- # Run the coroutine until it completes
90
- return loop.run_until_complete(coro)
91
  # --- END MODIFIED run_async_task FUNCTION ---
92
 
93
 
94
  @app.route('/run_proxy_task', methods=['POST'])
95
- def run_proxy_task_endpoint():
96
  data = request.json
97
  request_task_instruction = data.get('task')
98
 
@@ -109,9 +90,6 @@ def run_proxy_task_endpoint():
109
  logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
110
  return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
111
 
112
- # Construct the full task for the proxy-lite agent,
113
- # combining login instructions with the dynamic task from the user,
114
- # and adding explicit verification steps for login success.
115
  agent_task = (
116
  f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
117
  f"After attempting to log in, observe the page carefully. "
@@ -123,13 +101,16 @@ def run_proxy_task_endpoint():
123
  logger.info(f"Executing agent task: '{agent_task[:200]}...'")
124
 
125
  try:
126
- runner = run_async_task(initialize_runner())
127
- result = run_async_task(runner.run(agent_task))
 
128
 
129
  logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
130
  return jsonify({"output": result})
131
  except Exception as e:
132
  logger.exception(f"Error processing Salesforce task: {e}")
 
 
133
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
134
 
135
  @app.route('/')
 
1
  import gevent.monkey
2
+ gevent.monkey.patch_all(asyncio=True) # Keep this at the very top
3
+
4
+ import asyncio # Keep this
5
  from flask import Flask, request, jsonify
6
  from proxy_lite import Runner, RunnerConfig
7
  import os
 
19
  if _runner is None:
20
  logger.info("Initializing Proxy-lite Runner...")
21
 
 
22
  hf_api_token = os.environ.get("HF_API_TOKEN")
23
  if not hf_api_token:
24
  logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
25
  raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
26
 
 
27
  config = RunnerConfig.from_dict({
28
  "environment": {
29
  "name": "webbrowser",
30
  "homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
31
+ "headless": True,
32
+ "launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
33
+ "screenshot_delay": 3.0,
34
+ "include_html": True,
35
+ "include_poi_text": True,
 
36
  },
37
  "solver": {
38
  "name": "simple",
 
46
  }
47
  }
48
  },
49
+ "environment_timeout": 1800.0,
50
+ "action_timeout": 1800.0,
51
+ "task_timeout": 18000.0,
52
+ "max_steps": 150,
53
+ "logger_level": "DEBUG",
 
 
 
 
 
 
54
  })
55
 
 
56
  logger.info(f"DEBUG: app.py - Initializing Runner with environment_timeout: {config.environment_timeout} seconds")
57
+ logger.info(f"DEBUG: app.py - Full config used: {config.model_dump_json(indent=2)}")
 
 
58
 
59
  _runner = Runner(config=config)
60
  logger.info("Proxy-lite Runner initialized successfully.")
61
  return _runner
62
 
63
+ # --- MODIFIED run_async_task FUNCTION (SIMPLIFIED) ---
64
+ # This function is no longer needed in most cases with gevent.monkey.patch_all(asyncio=True)
65
+ # but if you must call async functions from sync context, you simply await them.
66
+ # However, you are already in an async function context within Flask routes when using Gunicorn/gevent.
67
+ # The Gunicorn worker itself implicitly runs an event loop.
68
+ # Let's remove the run_until_complete part.
69
+
70
+ # DELETED: def run_async_task(coro): ...
71
+
 
 
 
 
 
 
 
72
  # --- END MODIFIED run_async_task FUNCTION ---
73
 
74
 
75
  @app.route('/run_proxy_task', methods=['POST'])
76
+ async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
77
  data = request.json
78
  request_task_instruction = data.get('task')
79
 
 
90
  logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
91
  return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
92
 
 
 
 
93
  agent_task = (
94
  f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
95
  f"After attempting to log in, observe the page carefully. "
 
101
  logger.info(f"Executing agent task: '{agent_task[:200]}...'")
102
 
103
  try:
104
+ # Since run_proxy_task_endpoint is now async, you can directly await
105
+ runner = await initialize_runner()
106
+ result = await runner.run(agent_task) # <--- AWAIT DIRECTLY
107
 
108
  logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
109
  return jsonify({"output": result})
110
  except Exception as e:
111
  logger.exception(f"Error processing Salesforce task: {e}")
112
+ # The RuntimeWarning: coroutine 'initialize_runner' was never awaited will disappear
113
+ # because initialize_runner is now awaited.
114
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
115
 
116
  @app.route('/')
src/proxy_lite/browser/browser.py CHANGED
@@ -15,6 +15,8 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
 
 
18
  SELF_CONTAINED_TAGS = [
19
  # many of these are non-interactive but keeping them anyway
20
  "area",
@@ -183,10 +185,53 @@ class BrowserSession:
183
  )
184
  async def update_poi(self) -> None:
185
  try:
186
- await self.current_page.wait_for_load_state(timeout=60000)
187
- except PlaywrightTimeoutError:
188
- logger.error(f"Timeout waiting for website load state: {self.current_url}")
189
- await self.current_page.wait_for_selector("body", timeout=180000, state="visible")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # Run the bounding box javascript code to highlight the points of interest on the page
191
  page_info = await self.current_page.evaluate(
192
  """() => {
@@ -204,7 +249,6 @@ class BrowserSession:
204
  max_iframes = 10
205
 
206
  # Define an asynchronous function to process and filter each iframe
207
-
208
  tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
209
 
210
  results = await asyncio.gather(*tasks)
 
15
  from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
16
  from proxy_lite.logger import logger
17
 
18
+ import base64
19
+
20
  SELF_CONTAINED_TAGS = [
21
  # many of these are non-interactive but keeping them anyway
22
  "area",
 
185
  )
186
  async def update_poi(self) -> None:
187
  try:
188
+ # Added for robustness based on previous discussions
189
+ await self.current_page.wait_for_load_state("networkidle", timeout=180000)
190
+ logger.debug("wait_for_load_state('networkidle') completed.")
191
+
192
+ # This is the line that was previously timing out, now with increased timeout.
193
+ # Adding explicit try/except around it for specific debugging.
194
+ try:
195
+ await self.current_page.wait_for_selector("body", timeout=180000, state="visible")
196
+ logger.debug("wait_for_selector('body', state='visible') completed.")
197
+ except PlaywrightTimeoutError as e:
198
+ # --- START TEMPORARY DEBUGGING CODE ---
199
+ current_url = self.current_page.url if self.current_page else "N/A"
200
+ logger.error(f"DEBUGGING: Playwright Timeout (180s) on body selector at URL: {current_url}")
201
+
202
+ html_content = None
203
+ try:
204
+ if self.current_page:
205
+ html_content = await self.current_page.content()
206
+ # Log only a snippet of HTML to avoid excessively large logs
207
+ logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
208
+ except Exception as html_e:
209
+ logger.error(f"DEBUGGING: Could not get HTML content: {html_e}")
210
+
211
+ screenshot_b64 = "N/A"
212
+ try:
213
+ if self.current_page:
214
+ # Capture screenshot at lower quality to keep log size manageable
215
+ screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
216
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
217
+ # Log only a very short snippet of base64 string
218
+ logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
219
+ except Exception as ss_e:
220
+ logger.error(f"DEBUGGING: Could not take screenshot: {ss_e}")
221
+
222
+ # Re-raise the original exception to ensure the task still fails,
223
+ # but now with crucial debugging information in the logs.
224
+ raise e
225
+ # --- END TEMPORARY DEBUGGING CODE ---
226
+
227
+ except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
228
+ logger.error(f"Timeout waiting for website load state (networkidle): {self.current_url}")
229
+ raise # Re-raise if load_state itself times out
230
+
231
+ except Exception as e:
232
+ logger.error(f"An unexpected error occurred during page readiness check: {e}")
233
+ raise
234
+
235
  # Run the bounding box javascript code to highlight the points of interest on the page
236
  page_info = await self.current_page.evaluate(
237
  """() => {
 
249
  max_iframes = 10
250
 
251
  # Define an asynchronous function to process and filter each iframe
 
252
  tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
253
 
254
  results = await asyncio.gather(*tasks)