export HF_HOME="/data/.huggingface" | |
echo "PWD: $(pwd)" | |
echo $HF_TOKEN > .hf_token | |
echo "LS: $(ls -als)" | |
while true; do nvidia-smi; sleep 600; done & | |
python3 -c "import torch; \ | |
print(f\"is availeble = {torch.cuda.is_available()}\"); \ | |
print(f\"device count = {torch.cuda.device_count()}\"); \ | |
print(f\"current device = {torch.cuda.current_device()}\")" | |
RUN_AND_WAIT() { | |
# 1st param: the script name, eg "server.controller" | |
# 2nd param: the command to execute | |
# 3rd param: the log entry to watch for that indicate the component is ready | |
COMPONENT=$1 | |
echo "Starting $COMPONENT" | |
nohup eval $2 2>&1 | tee "$COMPONENT.log" & | |
while [ `grep -c $3 "$COMPONENT.log"` -eq '0' ];do | |
sleep 1s; | |
echo "wait $COMPONENT to be running" | |
done | |
echo "$COMPONENT is running" | |
} | |
COMPONENT="serve.controller" | |
COMMAND=python3 -m $COMPONENT --host 0.0.0.0 --port 10000 | |
READY_LOG="Uvicorn running on" | |
RUN_AND_WAIT $COMPONENT $COMMAND $READY_LOG | |
# COMPONENT="serve.controller" | |
# echo "Starting $COMPONENT" | |
# nohup python3 -m $COMPONENT --host 0.0.0.0 --port 10000 2>&1 | tee "$COMPONENT.log" & | |
# while [ `grep -c "Uvicorn running on" "$COMPONENT.log"` -eq '0' ];do | |
# sleep 1s; | |
# echo "wait $COMPONENT to be running" | |
# done | |
# echo "$COMPONENT is running" | |
# sleep 30 | |
# echo "Starting prism-dinosiglip+13b" | |
# python3 -m interactive_demo --port 40000 --model_id prism-dinosiglip+13b & | |
# P4=$! | |
# echo "Starting prism-dinosiglip+7b" | |
# python3 -m interactive_demo --port 40000 --model_id prism-dinosiglip+7b & | |
COMPONENT="interactive_demo" | |
COMMAND=python3 -m $COMPONENT --port 40000 --model_id prism-dinosiglip+7 | |
READY_LOG="loaded prismatic prism-dinosiglip+7b" | |
RUN_AND_WAIT $COMPONENT $COMMAND $READY_LOG | |
# echo "Starting $COMPONENT" | |
# nohup | tee "$COMPONENT.log" & | |
# while [ `grep -c "loaded prismatic prism-dinosiglip+7b" "$COMPONENT.log"` -eq '0' ];do | |
# sleep 1s; | |
# echo "wait $COMPONENT to be running" | |
# done | |
# echo "$COMPONENT is running" | |
# python3 -m interactive_demo --port 40002 --model_id prism-dinosiglip-controlled+7b & | |
# P6=$! | |
# python3 -m interactive_demo --port 40003 --model_id llava-v1.5-7b & | |
# P7=$! | |
sleep 30 | |
echo "Starting serve.gradio_web_server" | |
python3 -m serve.gradio_web_server --controller http://127.0.0.1:10000 --model-list-mode reload & # --share | |
P_WEB=$! | |
sleep 30 | |
ls -als $HF_HOME | |
tree --du -h $HF_HOME | |
wait $P_CON $P_WEB $P_P7B # $P5 $P6 $P7 |