@@ -38,15 +38,42 @@ container_image=${19}
38
38
mounts=${20}
39
39
workdir=${21}
40
40
model_dir=${22}
41
- repo_dir=${23}
41
+ trtllm_repo=${23}
42
+
43
+ echo " ================= parameters ================="
44
+ echo " num_ctx_servers: ${num_ctx_servers} "
45
+ echo " ctx_tp_size: ${ctx_tp_size} "
46
+ echo " ctx_batch_size: ${ctx_batch_size} "
47
+ echo " ctx_max_num_tokens: ${ctx_max_num_tokens} "
48
+ echo " ctx_enable_attention_dp: ${ctx_enable_attention_dp} "
49
+ echo " num_gen_servers: ${num_gen_servers} "
50
+ echo " gen_tp_size: ${gen_tp_size} "
51
+ echo " gen_batch_size: ${gen_batch_size} "
52
+ echo " gen_max_num_tokens: ${gen_max_num_tokens} "
53
+ echo " gen_enable_attention_dp: ${gen_enable_attention_dp} "
54
+ echo " gen_gpu_memory_fraction: ${gen_gpu_memory_fraction} "
55
+ echo " eplb_num_slots: ${eplb_num_slots} "
56
+ echo " mtp_size: ${mtp_size} "
57
+ echo " concurrency: ${concurrency} "
58
+ echo " isl: ${isl} "
59
+ echo " osl: ${osl} "
60
+ echo " multi_round: ${multi_round} "
61
+ echo " streaming: ${streaming} "
62
+ echo " container_image: ${container_image} "
63
+ echo " mounts: ${mounts} "
64
+ echo " workdir: ${workdir} "
65
+ echo " model_dir: ${model_dir} "
66
+ echo " trtllm_repo: ${trtllm_repo} "
67
+ echo " ==========================================="
68
+
42
69
43
70
ctx_max_seq_len=$(( isl + 1 ))
44
71
gen_max_seq_len=$(( isl + osl))
45
72
ctx_gpu_frac=0.75
46
73
cache_transceiver_max_num_tokens=8448
47
74
48
75
container_name=disaggr
49
- logdir=${workdir} /benchmark-${isl} -${osl} /
76
+ logdir=${workdir} /benchmark-${isl} -${osl}
50
77
mkdir -p ${logdir}
51
78
full_logdir=${logdir} /ctx${num_ctx_servers} _gen${num_gen_servers} _dep${gen_tp_size} _batch${gen_batch_size} _eplb${eplb_num_slots} _mtp${mtp_size}
52
79
66
93
mkdir -p ${full_logdir}
67
94
echo " Log will be saved to: ${full_logdir} "
68
95
96
+ if [ -z " ${TRT_LLM_GIT_COMMIT} " ]; then
97
+ export TRT_LLM_GIT_COMMIT=$( git -C ${trtllm_repo} rev-parse --short HEAD 2> /dev/null || echo " unknown" )
98
+ echo " TRT_LLM_GIT_COMMIT: ${TRT_LLM_GIT_COMMIT} "
99
+ fi
100
+
69
101
nsys_on=" "
70
102
# nsys_on=${full_logdir} # Uncomment this line to enable Nsys profiling
71
-
72
103
# start the container
73
104
srun -l --container-image=${container_image} \
74
105
--container-name=${container_name} \
75
106
--container-mounts=${mounts} \
76
107
--mpi=pmix \
77
108
echo " Container up."
78
109
110
+ if [ -n " ${trtllm_repo} " ]; then
111
+ srun --container-name=${container_name} \
112
+ --container-mounts=${mounts} \
113
+ --mpi=pmix --overlap -N $SLURM_NNODES --ntasks-per-node=1 \
114
+ bash -c " cd ${trtllm_repo} && echo 'Running install operation...' && pip install -e . " 2>&1 | tee ${full_logdir} /install.log
115
+ fi
116
+
79
117
# generate the yaml file
80
118
srun -l --container-name=${container_name} \
81
119
--container-mounts=${mounts} \
@@ -105,18 +143,19 @@ echo "YAML file generated."
105
143
hostname_value=$( grep ' ^hostname:' ${full_logdir} /config.yaml | awk -F' : ' ' {print $2}' )
106
144
echo " server host name: $hostname_value "
107
145
146
+
108
147
# start the workers
109
148
srun -l --container-name=${container_name} \
110
149
--container-mounts=${mounts} \
111
- --mpi=pmix --overlap \
112
- bash ${workdir} /start_worker.sh ${full_logdir} /config.yaml " ${enable_pdl} " ${ctx_gpus} ${nsys_on} ${repo_dir } & > ${full_logdir} /output_workers.log &
150
+ --mpi=pmix --overlap \
151
+ bash ${workdir} /start_worker.sh ${full_logdir} /config.yaml " ${enable_pdl} " ${ctx_gpus} ${nsys_on} & > ${full_logdir} /output_workers.log &
113
152
114
153
# start the server
115
154
srun -l --container-name=${container_name} \
116
155
--container-mounts=${mounts} \
117
156
--mpi=pmix --overlap -N 1 -n 1 \
118
157
-w ${hostname_value} \
119
- bash ${workdir} /start_server.sh ${full_logdir} /config.yaml ${repo_dir} & > ${full_logdir} /output_server.log &
158
+ bash ${workdir} /start_server.sh ${full_logdir} /config.yaml & > ${full_logdir} /output_server.log &
120
159
121
160
# start benchmarking
122
161
srun -l --container-name=${container_name} \
0 commit comments