Upload train_gpt2.slurm
Browse filesThis is the training resource allocation. Limited to one A100 with 26 GPU hours.
- train_gpt2.slurm +14 -0
@@ -0,0 +1,14 @@
1 |
2 |
#SBATCH --job-name=gpt2_train
3 |
#SBATCH --nodes=1
4 |
#SBATCH --ntasks-per-node=1
5 |
#SBATCH --cpus-per-task=32
6 |
#SBATCH --time=26:00:00 #Request 24 hours
7 |
#SBATCH --mem=128GB #Request 128GB per node
8 |
#SBATCH --partition=gpu #Request the GPU partition/queue
9 |
#SBATCH --gres=gpu:a100:1 #Request one A100 GPU to use
10 |
11 |
#SBATCH --output=gpt2_train.%j.log #Redirect stdout/err to file
12 |
13 |
# Run the training script
14 |
python train.py --config configs/config.yaml