Trisha Tomy commited on
Commit
6170d37
·
1 Parent(s): 8b9bbfa

Consolidated final Dockerfile and app.py configurations

Browse files
Files changed (2) hide show
  1. Dockerfile +31 -24
  2. app.py +34 -12
Dockerfile CHANGED
@@ -1,15 +1,13 @@
1
- # Dockerfile
2
- # Use a Python base image that is compatible with Playwright's system dependencies
3
  FROM python:3.11-slim-buster
4
 
5
  # Set the working directory inside the container
6
  WORKDIR /app
7
 
8
- # Install system dependencies required by Playwright's Chromium browser
9
- # These are common dependencies for running headless Chrome/Chromium.
10
- # This list might need minor adjustments based on specific runtime errors.
11
  RUN apt-get update && apt-get install -y \
12
- # Core libraries for graphics/rendering
13
  fonts-liberation \
14
  libappindicator3-1 \
15
  libasound2 \
@@ -34,50 +32,59 @@ RUN apt-get update && apt-get install -y \
34
  libxrender1 \
35
  libxss1 \
36
  libxtst6 \
37
- # Specific to Chromium/GPU (even if not using GPU, these are for display stack)
38
  libgbm-dev \
39
  libasound2-dev \
40
  # xvfb provides a virtual display server, often necessary for headless browsers
41
  xvfb \
42
- # Ensure Chromium is installed on the system (Playwright uses its own, but sometimes useful)
43
- chromium \
44
  git \
 
 
45
  # Clean up apt caches to reduce image size
46
  && rm -rf /var/lib/apt/lists/*
47
 
48
- # Copy Python dependencies (now only common ones, proxy-lite will be handled separately)
49
  COPY requirements.txt .
50
 
51
- # --- NEW/MODIFIED INSTALLATION BLOCK ---
52
- # Upgrade pip, setuptools, and wheel for a robust build environment
 
 
 
 
 
 
 
53
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel
54
 
55
- # Install proxy-lite in editable mode directly from its Git repository
56
- # This explicitly tells pip to clone and "install" it by linking directly,
57
- # which can sometimes bypass wheel build issues for complex projects.
58
  RUN pip install --no-cache-dir -e git+https://github.com/convergence-ai/proxy-lite.git#egg=proxy-lite
59
 
60
- # Install the rest of the dependencies from requirements.txt
 
 
61
  RUN pip install --no-cache-dir -r requirements.txt
62
- # --- END NEW/MODIFIED INSTALLATION BLOCK ---
63
 
64
- # Copy your application code into the container
65
  COPY . .
66
 
67
  # Install Playwright browser binaries within the container
68
- # This downloads Chromium into the container's Playwright-managed location.
69
  RUN playwright install chromium
70
 
71
  # Set environment variables for Playwright
72
  # PLAYWRIGHT_BROWSERS_PATH: Tells Playwright where to find the installed browsers.
73
- # DISPLAY, XDG_RUNTIME_DIR: Often needed for headless browser environments.
74
- ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright/
75
  ENV DISPLAY=:99
76
  ENV XDG_RUNTIME_DIR=/tmp
77
 
78
- # Expose the port your Flask app will listen on. Hugging Face Spaces expects 7860.
79
  EXPOSE 7860
80
 
81
- # Define the command to run your Flask application using Gunicorn for production
82
- # Hugging Face Spaces will execute this command to start your web service.
 
83
  CMD exec gunicorn --bind 0.0.0.0:7860 --workers 2 --worker-class gevent app:app --timeout 300
 
1
+ # Use an official Python runtime as a parent image
 
2
  FROM python:3.11-slim-buster
3
 
4
  # Set the working directory inside the container
5
  WORKDIR /app
6
 
7
+ # Install system dependencies required by Playwright, Git, and other libs
8
+ # This list is comprehensive for headless Chromium on Debian-based systems.
 
9
  RUN apt-get update && apt-get install -y \
10
+ # Core libraries for graphics/rendering (for browser)
11
  fonts-liberation \
12
  libappindicator3-1 \
13
  libasound2 \
 
32
  libxrender1 \
33
  libxss1 \
34
  libxtst6 \
35
+ # Specific to Chromium/GPU support (even if not using GPU, these are for display stack)
36
  libgbm-dev \
37
  libasound2-dev \
38
  # xvfb provides a virtual display server, often necessary for headless browsers
39
  xvfb \
40
+ # Install Git, required by pip to clone proxy-lite from GitHub
 
41
  git \
42
+ # Install Chromium browser, though Playwright often manages its own, this ensures system deps are met
43
+ chromium \
44
  # Clean up apt caches to reduce image size
45
  && rm -rf /var/lib/apt/lists/*
46
 
47
+ # Copy Python dependencies (only core Flask/Gunicorn deps, proxy-lite installed separately)
48
  COPY requirements.txt .
49
 
50
+ # --- START: Critical steps for proxy-lite and permissions ---
51
+
52
+ # Create the directory that proxy-lite's recorder insists on writing to
53
+ # and grant full permissions. This is a workaround for the PermissionError.
54
+ RUN mkdir -p /app/src/proxy-lite/local_trajectories \
55
+ && chmod -R 777 /app/src/proxy-lite/local_trajectories
56
+
57
+ # Upgrade pip, setuptools, and wheel for a robust Python build environment.
58
+ # This addresses potential build issues with older versions.
59
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel
60
 
61
+ # Install proxy-lite in "editable" mode directly from its GitHub repository.
62
+ # This explicitly tells pip to clone and link it, essential for its structure.
 
63
  RUN pip install --no-cache-dir -e git+https://github.com/convergence-ai/proxy-lite.git#egg=proxy-lite
64
 
65
+ # --- END: Critical steps for proxy-lite and permissions ---
66
+
67
+ # Install the rest of the common Python dependencies from requirements.txt
68
  RUN pip install --no-cache-dir -r requirements.txt
 
69
 
70
+ # Copy your Flask application code into the container
71
  COPY . .
72
 
73
  # Install Playwright browser binaries within the container
74
+ # This downloads the actual browser executables that Playwright controls.
75
  RUN playwright install chromium
76
 
77
  # Set environment variables for Playwright
78
  # PLAYWRIGHT_BROWSERS_PATH: Tells Playwright where to find the installed browsers.
79
+ # DISPLAY, XDG_RUNTIME_DIR: Often needed for headless browser environments (xvfb).
80
+ ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright/
81
  ENV DISPLAY=:99
82
  ENV XDG_RUNTIME_DIR=/tmp
83
 
84
+ # Expose the port your Flask app will listen on. Hugging Face Spaces requires 7860.
85
  EXPOSE 7860
86
 
87
+ # Define the command to run your Flask application using Gunicorn for production.
88
+ # --worker-class gevent is used because proxy-lite uses asyncio.
89
+ # --timeout 300s gives 5 minutes for tasks to complete.
90
  CMD exec gunicorn --bind 0.0.0.0:7860 --workers 2 --worker-class gevent app:app --timeout 300
app.py CHANGED
@@ -1,36 +1,50 @@
 
 
 
1
  import gevent.monkey
2
  gevent.monkey.patch_all(asyncio=True)
3
- # app.py
4
  import asyncio
5
  from flask import Flask, request, jsonify
6
  from proxy_lite import Runner, RunnerConfig
7
- from proxy_lite.config import RecorderConfig
8
  import os
9
  import logging
10
 
 
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
  logger = logging.getLogger(__name__)
13
 
14
  app = Flask(__name__)
15
 
 
 
16
  _runner = None
17
 
18
  async def initialize_runner():
 
19
  global _runner
20
  if _runner is None:
21
  logger.info("Initializing Proxy-lite Runner...")
 
22
  hf_api_token = os.environ.get("HF_API_TOKEN")
23
  if not hf_api_token:
24
  logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
25
  raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
26
 
 
 
 
27
  recorder_config_instance = RecorderConfig(root_path="/tmp/proxy_lite_runs")
 
28
 
 
 
29
  config = RunnerConfig(
30
  environment={
31
  "name": "webbrowser",
32
  "homepage": "https://www.google.com",
33
- "headless": True,
34
  },
35
  solver={
36
  "name": "simple",
@@ -46,31 +60,33 @@ async def initialize_runner():
46
  },
47
  recorder=recorder_config_instance # Pass the explicitly created instance
48
  )
 
 
 
49
  _runner = Runner(config=config)
50
  logger.info("Proxy-lite Runner initialized successfully.")
51
  return _runner
52
 
53
- # --- MODIFIED run_async_task FUNCTION ---
54
  def run_async_task(coro):
55
  """
56
  Helper to run async coroutines in a synchronous context (like Flask routes).
57
- Ensures an event loop exists and runs the coroutine.
 
58
  """
59
  try:
60
  # Try to get the running loop (for current thread/greenlet)
61
  loop = asyncio.get_running_loop()
62
  except RuntimeError:
63
- # If no loop is running, create a new one for this thread
64
  loop = asyncio.new_event_loop()
65
  asyncio.set_event_loop(loop)
66
-
67
  # Run the coroutine until it completes
68
  return loop.run_until_complete(coro)
69
- # --- END MODIFIED run_async_task FUNCTION ---
70
-
71
 
72
  @app.route('/run_proxy_task', methods=['POST'])
73
  def run_proxy_task_endpoint():
 
74
  data = request.json
75
  task = data.get('task')
76
  if not task:
@@ -79,23 +95,29 @@ def run_proxy_task_endpoint():
79
 
80
  logger.info(f"Received task for proxy-lite: '{task}'")
81
  try:
 
82
  runner = run_async_task(initialize_runner())
 
83
  result = run_async_task(runner.run(task))
84
 
85
- logger.info(f"Proxy-lite task completed. Output: {result[:200]}...")
 
86
  return jsonify({"output": result})
87
  except Exception as e:
88
- logger.exception(f"Error processing task '{task}':")
89
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
90
 
91
  @app.route('/')
92
  def root():
 
93
  logger.info("Root endpoint accessed.")
94
  return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
95
 
96
  if __name__ == '__main__':
 
97
  if not os.environ.get("HF_API_TOKEN"):
98
  logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
99
  exit(1)
100
  logger.info("Starting Flask development server on 0.0.0.0:7860...")
101
- app.run(host='0.0.0.0', port=7860, debug=True)
 
 
1
+ # app.py
2
+ # IMPORTANT: These two lines MUST be at the very top, before any other imports.
3
+ # They patch asyncio to work cooperatively with gevent workers.
4
  import gevent.monkey
5
  gevent.monkey.patch_all(asyncio=True)
6
+
7
  import asyncio
8
  from flask import Flask, request, jsonify
9
  from proxy_lite import Runner, RunnerConfig
10
+ from proxy_lite.config import RecorderConfig # Needed for explicit instantiation
11
  import os
12
  import logging
13
 
14
+ # Configure logging for better visibility in Hugging Face Space logs
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
  logger = logging.getLogger(__name__)
17
 
18
  app = Flask(__name__)
19
 
20
+ # Global runner instance. It will be initialized once per worker process/greenlet
21
+ # upon the first request to ensure correct event loop binding.
22
  _runner = None
23
 
24
  async def initialize_runner():
25
+ """Initializes the Proxy-lite Runner asynchronously."""
26
  global _runner
27
  if _runner is None:
28
  logger.info("Initializing Proxy-lite Runner...")
29
+ # Retrieve Hugging Face API Token from environment variables (set as Space Secret)
30
  hf_api_token = os.environ.get("HF_API_TOKEN")
31
  if not hf_api_token:
32
  logger.error("HF_API_TOKEN environment variable not set. Cannot initialize Runner.")
33
  raise ValueError("HF_API_TOKEN environment variable not set. Please set it as a Space secret.")
34
 
35
+ # --- EXPLICITLY CREATE RECORDER_CONFIG ---
36
+ # This addresses the PermissionError by ensuring the root_path is explicitly set
37
+ # and passed as a Pydantic object, rather than relying on nested dict parsing.
38
  recorder_config_instance = RecorderConfig(root_path="/tmp/proxy_lite_runs")
39
+ # --- END EXPLICIT RECORDER_CONFIG ---
40
 
41
+ # --- EXPLICITLY CREATE RUNNER_CONFIG ---
42
+ # Pass all configuration parameters as keyword arguments to the Pydantic model.
43
  config = RunnerConfig(
44
  environment={
45
  "name": "webbrowser",
46
  "homepage": "https://www.google.com",
47
+ "headless": True, # Keep headless for server environment
48
  },
49
  solver={
50
  "name": "simple",
 
60
  },
61
  recorder=recorder_config_instance # Pass the explicitly created instance
62
  )
63
+ # --- END EXPLICIT RUNNER_CONFIG ---
64
+
65
+ # Instantiate the Runner, passing the config as a keyword argument (Pydantic style)
66
  _runner = Runner(config=config)
67
  logger.info("Proxy-lite Runner initialized successfully.")
68
  return _runner
69
 
 
70
  def run_async_task(coro):
71
  """
72
  Helper to run async coroutines in a synchronous context (like Flask routes).
73
+ Ensures an event loop exists for the current thread/greenlet and runs the coroutine.
74
+ This addresses the "no current event loop" errors.
75
  """
76
  try:
77
  # Try to get the running loop (for current thread/greenlet)
78
  loop = asyncio.get_running_loop()
79
  except RuntimeError:
80
+ # If no loop is running, create a new one for this thread/greenlet
81
  loop = asyncio.new_event_loop()
82
  asyncio.set_event_loop(loop)
83
+
84
  # Run the coroutine until it completes
85
  return loop.run_until_complete(coro)
 
 
86
 
87
  @app.route('/run_proxy_task', methods=['POST'])
88
  def run_proxy_task_endpoint():
89
+ """API endpoint to receive a task and execute it with Proxy-lite."""
90
  data = request.json
91
  task = data.get('task')
92
  if not task:
 
95
 
96
  logger.info(f"Received task for proxy-lite: '{task}'")
97
  try:
98
+ # Ensure runner is initialized (and event loop handled) before use
99
  runner = run_async_task(initialize_runner())
100
+ # Run the task using the proxy-lite runner
101
  result = run_async_task(runner.run(task))
102
 
103
+ logger.info(f"Proxy-lite task completed. Output: {result[:200]}...") # Log truncated output
104
+ # Return the result as a JSON response
105
  return jsonify({"output": result})
106
  except Exception as e:
107
+ logger.exception(f"Error processing task '{task}':") # Logs full traceback for debugging
108
  return jsonify({"error": f"An error occurred: {str(e)}. Check logs for details."}), 500
109
 
110
  @app.route('/')
111
  def root():
112
+ """Basic root endpoint for health check and user info."""
113
  logger.info("Root endpoint accessed.")
114
  return "Proxy-lite API is running. Send POST requests to /run_proxy_task with a 'task' in JSON body."
115
 
116
  if __name__ == '__main__':
117
+ # During local development, ensure HF_API_TOKEN is set manually
118
  if not os.environ.get("HF_API_TOKEN"):
119
  logger.error("HF_API_TOKEN environment variable is not set. Please set it for local testing.")
120
  exit(1)
121
  logger.info("Starting Flask development server on 0.0.0.0:7860...")
122
+ # Hugging Face Spaces expects binding to 0.0.0.0 and listening on port 7860
123
+ app.run(host='0.0.0.0', port=7860, debug=True) # debug=True for local testing, disable for production