File size: 732 Bytes
0c79708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787305e
0c79708
29a6685
 
 
0c79708
 
 
787305e
8468281
787305e
0c79708
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Use CUDA base image for GPU support
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    git \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements into container
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir wandb tiktoken datasets

# Create output directory with proper permissions
RUN mkdir -p /app/out && chmod 777 /app/out

# Copy the project files
COPY . .

# Prepare the OpenWebText dataset
RUN cd /app/data/openwebtext && python prepare.py

# Command to run training
CMD ["python", "train.py", "--wandb_log=True"]