torinriley commited on
Commit
787305e
·
1 Parent(s): 29a6685
Files changed (1) hide show
  1. Dockerfile +6 -1
Dockerfile CHANGED
@@ -14,7 +14,7 @@ COPY requirements.txt .
14
 
15
  # Install Python dependencies
16
  RUN pip install --no-cache-dir -r requirements.txt
17
- RUN pip install --no-cache-dir wandb
18
 
19
  # Create output directory with proper permissions
20
  RUN mkdir -p /app/out && chmod 777 /app/out
@@ -22,5 +22,10 @@ RUN mkdir -p /app/out && chmod 777 /app/out
22
  # Copy the project files
23
  COPY . .
24
 
 
 
 
 
 
25
  # Command to run training
26
  CMD ["python", "train.py", "--wandb_log=True"]
 
14
 
15
  # Install Python dependencies
16
  RUN pip install --no-cache-dir -r requirements.txt
17
+ RUN pip install --no-cache-dir wandb tiktoken datasets
18
 
19
  # Create output directory with proper permissions
20
  RUN mkdir -p /app/out && chmod 777 /app/out
 
22
  # Copy the project files
23
  COPY . .
24
 
25
+ # Prepare the OpenWebText dataset
26
+ RUN mkdir -p /app/data/openwebtext && \
27
+ cd /app/data/openwebtext && \
28
+ python prepare.py
29
+
30
  # Command to run training
31
  CMD ["python", "train.py", "--wandb_log=True"]