Spaces:
Running
Running
File size: 10,770 Bytes
b10121d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
#!/usr/bin/env bash
run() {
# TP 4 GPUs
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 5.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 4.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 4.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 3.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 3.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 2.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 2.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.75 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
# TP 2 GPUs
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 2.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 2.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 2.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.75 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.75 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1
# 1 GPU
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.125 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 1.00 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.875 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.75 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.625 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.50 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.375 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.25 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 0.125 --power-limit $PL --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0
}
# Warmup
timeout --signal SIGINT 120 python scripts/benchmark_one.py --backend vllm --server-image mlenergy/vllm:v0.3.0-openai --model meta-llama/Llama-2-13b-chat-hf --sharegpt-path ../../../sharegpt/ShareGPT_V3_filtered_500.json --request-rate 5.00 --power-limit 300 --result-root results/2024-02-19-scaling --huggingface-token $HF_TOKEN --gpu-ids 0 1 2 3
# PL=300
# run
#
# PL=275
# run
#
# PL=250
# run
#
# PL=225
# run
#
# PL=200
# run
PL=175
run
PL=150
run
PL=125
run
PL=100
run
|