Spaces:
Runtime error
Runtime error
File size: 3,543 Bytes
8a42f8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
#!/bin/bash
print_banner() {
printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"
}
print_banner "Distributed status: $1"
echo $2
DATADIR=$2
if [ -n "$3" ]
then
USE_BASELINE=""
else
USE_BASELINE="--use_baseline"
fi
if [ "$1" == "single_gpu" ]
then
BASE_CMD="python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi
if [ "$1" == "distributed" ]
then
BASE_CMD="python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi
ADAM_ARGS="--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"
keep_batchnorms=(
""
"--keep-batchnorm-fp32 True"
"--keep-batchnorm-fp32 False"
)
loss_scales=(
""
"--loss-scale 1.0"
"--loss-scale 128.0"
"--loss-scale dynamic"
)
opt_levels=(
"O0"
"O1"
"O2"
"O3"
)
rm True*
rm False*
set -e
print_banner "Installing Apex with --cuda_ext and --cpp_ext"
pushd ../../..
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
popd
for opt_level in "${opt_levels[@]}"
do
for loss_scale in "${loss_scales[@]}"
do
for keep_batchnorm in "${keep_batchnorms[@]}"
do
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
then
print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
continue
fi
print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR"
set -x
${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR
set +x
done
done
done
# Handle FusedAdam separately due to limited support.
# FusedAdam will not be tested for bitwise accuracy against the Python implementation.
# The L0 tests already do so. These tests are here to ensure that it actually runs,
# and get an idea of performance.
for loss_scale in "${loss_scales[@]}"
do
print_banner "${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR"
set -x
${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR
set +x
done
print_banner "Reinstalling apex without extensions"
pushd ../../..
pip install -v --no-cache-dir .
popd
for opt_level in "${opt_levels[@]}"
do
for loss_scale in "${loss_scales[@]}"
do
for keep_batchnorm in "${keep_batchnorms[@]}"
do
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
then
print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
continue
fi
print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR"
set -x
${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR
set +x
done
done
done
print_banner "Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"
for opt_level in "${opt_levels[@]}"
do
for loss_scale in "${loss_scales[@]}"
do
for keep_batchnorm in "${keep_batchnorms[@]}"
do
echo ""
if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
then
echo "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
continue
fi
echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
set -x
python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --use_baseline
set +x
done
done
done
print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"
pushd ../../..
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
popd
|