Spaces:
Running
Running
Trisha Tomy
commited on
Commit
·
5aa95b3
1
Parent(s):
dab2696
TEMP: Added debug logging for body selector timeout
Browse files- app.py +29 -48
- src/proxy_lite/browser/browser.py +49 -5
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gevent.monkey
|
2 |
-
gevent.monkey.patch_all(asyncio=True)
|
3 |
-
|
4 |
-
import asyncio
|
5 |
from flask import Flask, request, jsonify
|
6 |
from proxy_lite import Runner, RunnerConfig
|
7 |
import os
|
@@ -19,23 +19,20 @@ async def initialize_runner():
|
|
19 |
if _runner is None:
|
20 |
logger.info("Initializing Proxy-lite Runner...")
|
21 |
|
22 |
-
# Retrieve Hugging Face API token from environment variables
|
23 |
hf_api_token = os.environ.get("HF_API_TOKEN")
|
24 |
if not hf_api_token:
|
25 |
logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
|
26 |
raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
|
27 |
|
28 |
-
# Define RunnerConfig
|
29 |
config = RunnerConfig.from_dict({
|
30 |
"environment": {
|
31 |
"name": "webbrowser",
|
32 |
"homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
|
33 |
-
"headless": True,
|
34 |
-
"launch_args": ["--no-sandbox", "--disable-setuid-sandbox"]
|
35 |
-
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"include_poi_text": True, # Keep including points of interest text
|
39 |
},
|
40 |
"solver": {
|
41 |
"name": "simple",
|
@@ -49,50 +46,34 @@ async def initialize_runner():
|
|
49 |
}
|
50 |
}
|
51 |
},
|
52 |
-
|
53 |
-
"
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
# Based on your logs, action_timeout and task_timeout seem to be default, so let's add them:
|
58 |
-
"action_timeout": 1800.0, # As per your earlier runner.py __main__ block
|
59 |
-
"task_timeout": 18000.0, # As per your earlier runner.py __main__ block
|
60 |
-
"max_steps": 150, # As per your earlier runner.py __main__ block
|
61 |
-
"logger_level": "DEBUG", # Set this to DEBUG for more detailed logging during troubleshooting
|
62 |
-
# --- END OF MOVED KEY ---
|
63 |
})
|
64 |
|
65 |
-
# --- ADDED DEBUG LOGGING HERE ---
|
66 |
logger.info(f"DEBUG: app.py - Initializing Runner with environment_timeout: {config.environment_timeout} seconds")
|
67 |
-
logger.info(f"DEBUG: app.py - Full config used: {config.model_dump_json(indent=2)}")
|
68 |
-
# If you are using Pydantic v1, use: logger.info(f"DEBUG: app.py - Full config used: {config.json(indent=2)}")
|
69 |
-
# --- END ADDED DEBUG LOGGING ---
|
70 |
|
71 |
_runner = Runner(config=config)
|
72 |
logger.info("Proxy-lite Runner initialized successfully.")
|
73 |
return _runner
|
74 |
|
75 |
-
# --- MODIFIED run_async_task FUNCTION ---
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
except RuntimeError:
|
85 |
-
# If no loop is running, create a new one for this thread
|
86 |
-
loop = asyncio.new_event_loop()
|
87 |
-
asyncio.set_event_loop(loop)
|
88 |
-
|
89 |
-
# Run the coroutine until it completes
|
90 |
-
return loop.run_until_complete(coro)
|
91 |
# --- END MODIFIED run_async_task FUNCTION ---
|
92 |
|
93 |
|
94 |
@app.route('/run_proxy_task', methods=['POST'])
|
95 |
-
def run_proxy_task_endpoint():
|
96 |
data = request.json
|
97 |
request_task_instruction = data.get('task')
|
98 |
|
@@ -109,9 +90,6 @@ def run_proxy_task_endpoint():
|
|
109 |
logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
|
110 |
return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
|
111 |
|
112 |
-
# Construct the full task for the proxy-lite agent,
|
113 |
-
# combining login instructions with the dynamic task from the user,
|
114 |
-
# and adding explicit verification steps for login success.
|
115 |
agent_task = (
|
116 |
f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
|
117 |
f"After attempting to log in, observe the page carefully. "
|
@@ -123,13 +101,16 @@ def run_proxy_task_endpoint():
|
|
123 |
logger.info(f"Executing agent task: '{agent_task[:200]}...'")
|
124 |
|
125 |
try:
|
126 |
-
|
127 |
-
|
|
|
128 |
|
129 |
logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
|
130 |
return jsonify({"output": result})
|
131 |
except Exception as e:
|
132 |
logger.exception(f"Error processing Salesforce task: {e}")
|
|
|
|
|
133 |
return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
|
134 |
|
135 |
@app.route('/')
|
|
|
1 |
import gevent.monkey
|
2 |
+
gevent.monkey.patch_all(asyncio=True) # Keep this at the very top
|
3 |
+
|
4 |
+
import asyncio # Keep this
|
5 |
from flask import Flask, request, jsonify
|
6 |
from proxy_lite import Runner, RunnerConfig
|
7 |
import os
|
|
|
19 |
if _runner is None:
|
20 |
logger.info("Initializing Proxy-lite Runner...")
|
21 |
|
|
|
22 |
hf_api_token = os.environ.get("HF_API_TOKEN")
|
23 |
if not hf_api_token:
|
24 |
logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
|
25 |
raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
|
26 |
|
|
|
27 |
config = RunnerConfig.from_dict({
|
28 |
"environment": {
|
29 |
"name": "webbrowser",
|
30 |
"homepage": "https://dwd000006jia1mae.lightning.force.com/lightning/setup/AccountForecastSettings/home",
|
31 |
+
"headless": True,
|
32 |
+
"launch_args": ["--no-sandbox", "--disable-setuid-sandbox"],
|
33 |
+
"screenshot_delay": 3.0,
|
34 |
+
"include_html": True,
|
35 |
+
"include_poi_text": True,
|
|
|
36 |
},
|
37 |
"solver": {
|
38 |
"name": "simple",
|
|
|
46 |
}
|
47 |
}
|
48 |
},
|
49 |
+
"environment_timeout": 1800.0,
|
50 |
+
"action_timeout": 1800.0,
|
51 |
+
"task_timeout": 18000.0,
|
52 |
+
"max_steps": 150,
|
53 |
+
"logger_level": "DEBUG",
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
})
|
55 |
|
|
|
56 |
logger.info(f"DEBUG: app.py - Initializing Runner with environment_timeout: {config.environment_timeout} seconds")
|
57 |
+
logger.info(f"DEBUG: app.py - Full config used: {config.model_dump_json(indent=2)}")
|
|
|
|
|
58 |
|
59 |
_runner = Runner(config=config)
|
60 |
logger.info("Proxy-lite Runner initialized successfully.")
|
61 |
return _runner
|
62 |
|
63 |
+
# --- MODIFIED run_async_task FUNCTION (SIMPLIFIED) ---
|
64 |
+
# This function is no longer needed in most cases with gevent.monkey.patch_all(asyncio=True)
|
65 |
+
# but if you must call async functions from sync context, you simply await them.
|
66 |
+
# However, you are already in an async function context within Flask routes when using Gunicorn/gevent.
|
67 |
+
# The Gunicorn worker itself implicitly runs an event loop.
|
68 |
+
# Let's remove the run_until_complete part.
|
69 |
+
|
70 |
+
# DELETED: def run_async_task(coro): ...
|
71 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# --- END MODIFIED run_async_task FUNCTION ---
|
73 |
|
74 |
|
75 |
@app.route('/run_proxy_task', methods=['POST'])
|
76 |
+
async def run_proxy_task_endpoint(): # <--- MAKE THIS FUNCTION ASYNC
|
77 |
data = request.json
|
78 |
request_task_instruction = data.get('task')
|
79 |
|
|
|
90 |
logger.error("Salesforce credentials (SALESFORCE_USERNAME, SALESFORCE_PASSWORD) environment variables not set.")
|
91 |
return jsonify({"error": "Salesforce credentials not configured. Please set SALESFORCE_USERNAME and SALESFORCE_PASSWORD as Space secrets."}), 500
|
92 |
|
|
|
|
|
|
|
93 |
agent_task = (
|
94 |
f"Log in to Salesforce. The username is '{salesforce_username}' and the password is '{salesforce_password}'. "
|
95 |
f"After attempting to log in, observe the page carefully. "
|
|
|
101 |
logger.info(f"Executing agent task: '{agent_task[:200]}...'")
|
102 |
|
103 |
try:
|
104 |
+
# Since run_proxy_task_endpoint is now async, you can directly await
|
105 |
+
runner = await initialize_runner()
|
106 |
+
result = await runner.run(agent_task) # <--- AWAIT DIRECTLY
|
107 |
|
108 |
logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
|
109 |
return jsonify({"output": result})
|
110 |
except Exception as e:
|
111 |
logger.exception(f"Error processing Salesforce task: {e}")
|
112 |
+
# The RuntimeWarning: coroutine 'initialize_runner' was never awaited will disappear
|
113 |
+
# because initialize_runner is now awaited.
|
114 |
return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
|
115 |
|
116 |
@app.route('/')
|
src/proxy_lite/browser/browser.py
CHANGED
@@ -15,6 +15,8 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
|
|
15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
16 |
from proxy_lite.logger import logger
|
17 |
|
|
|
|
|
18 |
SELF_CONTAINED_TAGS = [
|
19 |
# many of these are non-interactive but keeping them anyway
|
20 |
"area",
|
@@ -183,10 +185,53 @@ class BrowserSession:
|
|
183 |
)
|
184 |
async def update_poi(self) -> None:
|
185 |
try:
|
186 |
-
|
187 |
-
|
188 |
-
logger.
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
# Run the bounding box javascript code to highlight the points of interest on the page
|
191 |
page_info = await self.current_page.evaluate(
|
192 |
"""() => {
|
@@ -204,7 +249,6 @@ class BrowserSession:
|
|
204 |
max_iframes = 10
|
205 |
|
206 |
# Define an asynchronous function to process and filter each iframe
|
207 |
-
|
208 |
tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
|
209 |
|
210 |
results = await asyncio.gather(*tasks)
|
|
|
15 |
from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
|
16 |
from proxy_lite.logger import logger
|
17 |
|
18 |
+
import base64
|
19 |
+
|
20 |
SELF_CONTAINED_TAGS = [
|
21 |
# many of these are non-interactive but keeping them anyway
|
22 |
"area",
|
|
|
185 |
)
|
186 |
async def update_poi(self) -> None:
|
187 |
try:
|
188 |
+
# Added for robustness based on previous discussions
|
189 |
+
await self.current_page.wait_for_load_state("networkidle", timeout=180000)
|
190 |
+
logger.debug("wait_for_load_state('networkidle') completed.")
|
191 |
+
|
192 |
+
# This is the line that was previously timing out, now with increased timeout.
|
193 |
+
# Adding explicit try/except around it for specific debugging.
|
194 |
+
try:
|
195 |
+
await self.current_page.wait_for_selector("body", timeout=180000, state="visible")
|
196 |
+
logger.debug("wait_for_selector('body', state='visible') completed.")
|
197 |
+
except PlaywrightTimeoutError as e:
|
198 |
+
# --- START TEMPORARY DEBUGGING CODE ---
|
199 |
+
current_url = self.current_page.url if self.current_page else "N/A"
|
200 |
+
logger.error(f"DEBUGGING: Playwright Timeout (180s) on body selector at URL: {current_url}")
|
201 |
+
|
202 |
+
html_content = None
|
203 |
+
try:
|
204 |
+
if self.current_page:
|
205 |
+
html_content = await self.current_page.content()
|
206 |
+
# Log only a snippet of HTML to avoid excessively large logs
|
207 |
+
logger.error(f"DEBUGGING: HTML Content (first 1000 chars) when timeout occurred:\n{html_content[:1000]}...")
|
208 |
+
except Exception as html_e:
|
209 |
+
logger.error(f"DEBUGGING: Could not get HTML content: {html_e}")
|
210 |
+
|
211 |
+
screenshot_b64 = "N/A"
|
212 |
+
try:
|
213 |
+
if self.current_page:
|
214 |
+
# Capture screenshot at lower quality to keep log size manageable
|
215 |
+
screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
|
216 |
+
screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
217 |
+
# Log only a very short snippet of base64 string
|
218 |
+
logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
|
219 |
+
except Exception as ss_e:
|
220 |
+
logger.error(f"DEBUGGING: Could not take screenshot: {ss_e}")
|
221 |
+
|
222 |
+
# Re-raise the original exception to ensure the task still fails,
|
223 |
+
# but now with crucial debugging information in the logs.
|
224 |
+
raise e
|
225 |
+
# --- END TEMPORARY DEBUGGING CODE ---
|
226 |
+
|
227 |
+
except PlaywrightTimeoutError: # This outer catch is for the wait_for_load_state timeout
|
228 |
+
logger.error(f"Timeout waiting for website load state (networkidle): {self.current_url}")
|
229 |
+
raise # Re-raise if load_state itself times out
|
230 |
+
|
231 |
+
except Exception as e:
|
232 |
+
logger.error(f"An unexpected error occurred during page readiness check: {e}")
|
233 |
+
raise
|
234 |
+
|
235 |
# Run the bounding box javascript code to highlight the points of interest on the page
|
236 |
page_info = await self.current_page.evaluate(
|
237 |
"""() => {
|
|
|
249 |
max_iframes = 10
|
250 |
|
251 |
# Define an asynchronous function to process and filter each iframe
|
|
|
252 |
tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
|
253 |
|
254 |
results = await asyncio.gather(*tasks)
|