Skip to content

Commit cfca53d

Browse files
committed
refactor the script and support concurrency list
Signed-off-by: xxi <[email protected]> modified: examples/disaggregated/slurm/disaggr_torch.slurm modified: examples/disaggregated/slurm/run_benchmark.sh modified: examples/disaggregated/slurm/start_server.sh modified: examples/disaggregated/slurm/start_worker.sh modified: examples/wide_ep/slurm_scripts/submit.sh
1 parent c39bcf2 commit cfca53d

File tree

5 files changed

+73
-63
lines changed

5 files changed

+73
-63
lines changed

examples/disaggregated/slurm/disaggr_torch.slurm

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,42 @@ container_image=${19}
3838
mounts=${20}
3939
workdir=${21}
4040
model_dir=${22}
41-
repo_dir=${23}
41+
trtllm_repo=${23}
42+
43+
echo "================= parameters ================="
44+
echo "num_ctx_servers: ${num_ctx_servers}"
45+
echo "ctx_tp_size: ${ctx_tp_size}"
46+
echo "ctx_batch_size: ${ctx_batch_size}"
47+
echo "ctx_max_num_tokens: ${ctx_max_num_tokens}"
48+
echo "ctx_enable_attention_dp: ${ctx_enable_attention_dp}"
49+
echo "num_gen_servers: ${num_gen_servers}"
50+
echo "gen_tp_size: ${gen_tp_size}"
51+
echo "gen_batch_size: ${gen_batch_size}"
52+
echo "gen_max_num_tokens: ${gen_max_num_tokens}"
53+
echo "gen_enable_attention_dp: ${gen_enable_attention_dp}"
54+
echo "gen_gpu_memory_fraction: ${gen_gpu_memory_fraction}"
55+
echo "eplb_num_slots: ${eplb_num_slots}"
56+
echo "mtp_size: ${mtp_size}"
57+
echo "concurrency: ${concurrency}"
58+
echo "isl: ${isl}"
59+
echo "osl: ${osl}"
60+
echo "multi_round: ${multi_round}"
61+
echo "streaming: ${streaming}"
62+
echo "container_image: ${container_image}"
63+
echo "mounts: ${mounts}"
64+
echo "workdir: ${workdir}"
65+
echo "model_dir: ${model_dir}"
66+
echo "trtllm_repo: ${trtllm_repo}"
67+
echo "==========================================="
68+
4269

4370
ctx_max_seq_len=$((isl + 1))
4471
gen_max_seq_len=$((isl + osl))
4572
ctx_gpu_frac=0.75
4673
cache_transceiver_max_num_tokens=8448
4774

4875
container_name=disaggr
49-
logdir=${workdir}/benchmark-${isl}-${osl}/
76+
logdir=${workdir}/benchmark-${isl}-${osl}
5077
mkdir -p ${logdir}
5178
full_logdir=${logdir}/ctx${num_ctx_servers}_gen${num_gen_servers}_dep${gen_tp_size}_batch${gen_batch_size}_eplb${eplb_num_slots}_mtp${mtp_size}
5279

@@ -66,16 +93,27 @@ fi
6693
mkdir -p ${full_logdir}
6794
echo "Log will be saved to: ${full_logdir}"
6895

96+
if [ -z "${TRT_LLM_GIT_COMMIT}" ]; then
97+
export TRT_LLM_GIT_COMMIT=$(git -C ${trtllm_repo} rev-parse --short HEAD 2>/dev/null || echo "unknown")
98+
echo "TRT_LLM_GIT_COMMIT: ${TRT_LLM_GIT_COMMIT}"
99+
fi
100+
69101
nsys_on=""
70102
# nsys_on=${full_logdir} # Uncomment this line to enable Nsys profiling
71-
72103
# start the container
73104
srun -l --container-image=${container_image} \
74105
--container-name=${container_name} \
75106
--container-mounts=${mounts} \
76107
--mpi=pmix \
77108
echo "Container up."
78109

110+
if [ -n "${trtllm_repo}" ]; then
111+
srun --container-name=${container_name} \
112+
--container-mounts=${mounts} \
113+
--mpi=pmix --overlap -N $SLURM_NNODES --ntasks-per-node=1 \
114+
bash -c "cd ${trtllm_repo} && echo 'Running install operation...' && pip install -e . " 2>&1 | tee ${full_logdir}/install.log
115+
fi
116+
79117
# generate the yaml file
80118
srun -l --container-name=${container_name} \
81119
--container-mounts=${mounts} \
@@ -105,18 +143,19 @@ echo "YAML file generated."
105143
hostname_value=$(grep '^hostname:' ${full_logdir}/config.yaml | awk -F': ' '{print $2}')
106144
echo "server host name: $hostname_value"
107145

146+
108147
# start the workers
109148
srun -l --container-name=${container_name} \
110149
--container-mounts=${mounts} \
111-
--mpi=pmix --overlap \
112-
bash ${workdir}/start_worker.sh ${full_logdir}/config.yaml "${enable_pdl}" ${ctx_gpus} ${nsys_on} ${repo_dir} &> ${full_logdir}/output_workers.log &
150+
--mpi=pmix --overlap \
151+
bash ${workdir}/start_worker.sh ${full_logdir}/config.yaml "${enable_pdl}" ${ctx_gpus} ${nsys_on} &> ${full_logdir}/output_workers.log &
113152

114153
# start the server
115154
srun -l --container-name=${container_name} \
116155
--container-mounts=${mounts} \
117156
--mpi=pmix --overlap -N 1 -n 1 \
118157
-w ${hostname_value} \
119-
bash ${workdir}/start_server.sh ${full_logdir}/config.yaml ${repo_dir} &> ${full_logdir}/output_server.log &
158+
bash ${workdir}/start_server.sh ${full_logdir}/config.yaml &> ${full_logdir}/output_server.log &
120159

121160
# start benchmarking
122161
srun -l --container-name=${container_name} \

examples/disaggregated/slurm/run_benchmark.sh

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ isl=$1
1616
osl=$2
1717
multi_round=$3
1818
model_name=$4
19-
concurrency=$5
19+
concurrency_list=$5
2020
streaming=$6
2121
log_path=$7
2222

@@ -89,32 +89,31 @@ do_get_logs(){
8989
}
9090

9191
# run the loadgen
92-
93-
export PATH=${HOME}/.local/bin:${PATH}
94-
mkdir -p ${log_path}/concurrency_${concurrency}
95-
cp ${log_path}/output_workers.log ${log_path}/concurrency_${concurrency}/workers_start.log
96-
max_count=$((${concurrency} * ${multi_round}))
97-
echo "Running loadgen with concurrency: ${concurrency}, max_count: ${max_count}"
98-
99-
python -m tensorrt_llm.serve.scripts.benchmark_serving \
100-
--model ${model_name} \
101-
--tokenizer ${model_name} \
102-
--dataset-name random \
103-
--dataset-path ${shared_gpt_path} \
104-
--random-input-len ${isl} \
105-
--random-output-len ${osl} \
106-
--random-prefix-len 0 \
107-
--num-prompts ${max_count} \
108-
--max-concurrency ${concurrency} \
109-
--host ${hostname} \
110-
--port ${port} \
111-
--ignore-eos \
112-
--no-test-input \
113-
$(if [ "${streaming}" = "false" ]; then echo "--non-streaming"; fi)
114-
115-
do_get_logs ${log_path}/output_workers.log ${log_path}/concurrency_${concurrency}
116-
# echo "" > ${log_path}/output_workers.log
117-
echo "done for ${concurrency} in folder ${log_path}/concurrency_${concurrency}"
92+
cp ${log_path}/output_workers.log ${log_path}/workers_start.log
93+
for concurrency in ${concurrency_list}; do
94+
mkdir -p ${log_path}/concurrency_${concurrency}
95+
max_count=$((${concurrency} * ${multi_round}))
96+
echo "Running loadgen with concurrency: ${concurrency}, max_count: ${max_count}"
97+
python -m tensorrt_llm.serve.scripts.benchmark_serving \
98+
--model ${model_name} \
99+
--tokenizer ${model_name} \
100+
--dataset-name random \
101+
--dataset-path ${shared_gpt_path} \
102+
--random-input-len ${isl} \
103+
--random-output-len ${osl} \
104+
--random-prefix-len 0 \
105+
--num-prompts ${max_count} \
106+
--max-concurrency ${concurrency} \
107+
--host ${hostname} \
108+
--port ${port} \
109+
--ignore-eos \
110+
--no-test-input \
111+
$(if [ "${streaming}" = "false" ]; then echo "--non-streaming"; fi)
112+
113+
do_get_logs ${log_path}/output_workers.log ${log_path}/concurrency_${concurrency}
114+
echo "" > ${log_path}/output_workers.log
115+
echo "done for ${concurrency} in folder ${log_path}/concurrency_${concurrency}"
116+
done
118117

119118
echo "Benchmark done, gracefully shutting down server and workers..."
120119
kill -9 $(ps aux | grep '[s]tart_server.sh' | awk '{print $2}') >/dev/null 2>&1 || true

examples/disaggregated/slurm/start_server.sh

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,6 @@ short_hostname=$(echo "$hostname" | awk -F'.' '{print $1}')
99
echo "short_hostname: ${short_hostname}"
1010

1111
config_file=$1
12-
repo_dir=$2
13-
14-
if [ ! -z "${repo_dir}" ]; then
15-
pushd ${repo_dir}
16-
sleep 120 # wait for the worker to finish to avoid file conflict
17-
if [ $SLURM_LOCALID == 0 ];then
18-
echo "Install dependencies on rank 0."
19-
pip install -e .
20-
else
21-
echo "Sleep 120 seconds on other ranks."
22-
sleep 120
23-
fi
24-
popd
25-
fi
26-
export PATH=${HOME}/.local/bin:${PATH}
2712

2813
# Check and replace hostname settings in config_file
2914
if [ -f "$config_file" ]; then

examples/disaggregated/slurm/start_worker.sh

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,9 @@ config_file=$1
44
enable_pdl=$2
55
ctx_gpus=$3
66
work_dir=$4
7-
repo_dir=$5
87
unset UCX_TLS
98
echo "config_file: ${config_file}, enable_pdl: ${enable_pdl}, ctx_gpus: ${ctx_gpus}, work_dir: ${work_dir}"
109

11-
if [ ! -z "${repo_dir}" ]; then
12-
pushd ${repo_dir}
13-
if [ $SLURM_LOCALID == 0 ];then
14-
echo "Install dependencies on rank 0."
15-
pip install -e .
16-
else
17-
echo "Sleep 120 seconds on other ranks."
18-
sleep 120
19-
fi
20-
popd
21-
fi
22-
export PATH=${HOME}/.local/bin:${PATH}
23-
2410
export TLLM_LOG_LEVEL=INFO
2511
export TRTLLM_MOE_ENABLE_ALLTOALL_WITHOUT_ALLGATHER=1
2612

examples/wide_ep/slurm_scripts/submit.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ for b in 1 64 1024; do
2929

3030
args=(
3131
${ctx_num} 4 4 4480 true # Context servers arguments
32-
1 16 1024 1024 "0.7" # Generation servers arguments
32+
1 16 1024 1024 true "0.7" # Generation servers arguments
3333
$eplb_num_slots $mtp_size # Other arguments
3434
$concurrency # Benchmarking arguments
3535
$isl
@@ -76,6 +76,7 @@ for b in 512; do
7676
$mounts
7777
$workdir
7878
$model_dir
79+
$repo_dir
7980
)
8081

8182
sbatch --nodes=${total_node_num} \

0 commit comments

Comments
 (0)