Skip to content

Commit 8569d4d

Browse files
yinggehTabrizian
authored andcommitted
test: Deprecate gpt_model_type "v1" static batching from triton_backend L0_backend_trtllm (#5229)
Signed-off-by: Yingge He <[email protected]>
1 parent 04fa6c0 commit 8569d4d

File tree

2 files changed

+16
-68
lines changed

2 files changed

+16
-68
lines changed

triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _parse_log_file(self, filename):
8282

8383
return json.loads(json_string)
8484

85-
def _parse_triton_metrics(self, filename, is_v1):
85+
def _parse_triton_metrics(self, filename):
8686
curl_counts = {}
8787
with open(filename) as metrics_file:
8888
for line in metrics_file:
@@ -91,12 +91,11 @@ def _parse_triton_metrics(self, filename, is_v1):
9191
metric_output = re.sub(r"^.*?{", "{", line).split()
9292
metric_key = metric_output[0]
9393
metric_value = metric_output[1]
94-
key = self._convert_metric_key_to_stats_key(
95-
metric_key, is_v1)
94+
key = self._convert_metric_key_to_stats_key(metric_key)
9695
curl_counts[key] = metric_value
9796
return curl_counts
9897

99-
def _convert_metric_key_to_stats_key(self, metric_output, is_v1):
98+
def _convert_metric_key_to_stats_key(self, metric_output):
10099
# Converts:
101100
# '{model="tensorrt_llm",request_type="context",version="1"}'
102101
# to:
@@ -107,15 +106,12 @@ def _convert_metric_key_to_stats_key(self, metric_output, is_v1):
107106
if not i.startswith('model') and not i.startswith('version')
108107
][0]
109108
self.assertIn(key, metric_to_stat_dict)
110-
if (is_v1):
111-
self.assertNotIn("inflight_batcher_specific_metric", key)
112-
else:
113-
self.assertNotIn("v1_specific_metric", key)
109+
self.assertNotIn("v1_specific_metric", key)
114110
return metric_to_stat_dict[key]
115111

116-
def _base_test(self, stats_file, metrics_file, is_v1):
112+
def _base_test(self, stats_file, metrics_file):
117113
stats = self._parse_log_file(stats_file)
118-
metrics = self._parse_triton_metrics(metrics_file, is_v1)
114+
metrics = self._parse_triton_metrics(metrics_file)
119115
self.assertEqual(len(stats.keys()), len(metrics.keys()))
120116
self.assertEqual(list(stats.keys()).sort(), list(metrics.keys()).sort())
121117
for metric_key in stats.keys():
@@ -140,45 +136,33 @@ def _base_test(self, stats_file, metrics_file, is_v1):
140136
timedelta(seconds=-1) <= difference, difference
141137
<= timedelta(seconds=1))
142138

143-
def test_1_gpu_v1(self):
144-
self._base_test("1gpu_v1_no_streaming_server.log",
145-
"1gpu_v1_no_stream_metrics.out", True)
146-
147139
def test_1_gpu_IFB_no_stream(self):
148140
self._base_test("1gpu_IFB_no_streaming_server.log",
149-
"1gpu_IFB_no_stream_metrics.out", False)
141+
"1gpu_IFB_no_stream_metrics.out")
150142

151143
def test_1_gpu_IFB_stream(self):
152144
self._base_test("1gpu_IFB_streaming_server.log",
153-
"1gpu_IFB_stream_metrics.out", False)
145+
"1gpu_IFB_stream_metrics.out")
154146

155147
if AVAILABLE_GPUS >= 2:
156148

157-
def test_2_gpu_v1(self):
158-
self._base_test("2gpu_v1_no_streaming_server.log",
159-
"2gpu_v1_no_stream_metrics.out", True)
160-
161149
def test_2_gpu_IFB_no_stream(self):
162150
self._base_test("2gpu_IFB_no_streaming_server.log",
163-
"2gpu_IFB_no_stream_metrics.out", False)
151+
"2gpu_IFB_no_stream_metrics.out")
164152

165153
def test_2_gpu_IFB_stream(self):
166154
self._base_test("2gpu_IFB_streaming_server.log",
167-
"2gpu_IFB_stream_metrics.out", False)
155+
"2gpu_IFB_stream_metrics.out")
168156

169157
if AVAILABLE_GPUS >= 4:
170158

171-
def test_4_gpu_v1(self):
172-
self._base_test("4gpu_v1_no_streaming_server.log",
173-
"4gpu_v1_no_stream_metrics.out", True)
174-
175159
def test_4_gpu_IFB_no_stream(self):
176160
self._base_test("4gpu_IFB_no_streaming_server.log",
177-
"4gpu_IFB_no_stream_metrics.out", False)
161+
"4gpu_IFB_no_stream_metrics.out")
178162

179163
def test_4_gpu_IFB_stream(self):
180164
self._base_test("4gpu_IFB_streaming_server.log",
181-
"4gpu_IFB_stream_metrics.out", False)
165+
"4gpu_IFB_stream_metrics.out")
182166

183167

184168
if __name__ == "__main__":

triton_backend/ci/L0_backend_trtllm/test.sh

Lines changed: 4 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -228,49 +228,13 @@ for NUM_GPU in "${NUM_GPUS_TO_TEST[@]}"; do
228228

229229
run_server "${SERVER_ARGS}"
230230
wait_for_server_ready ${SERVER_TIMEOUT} ${SERVER_PID[@]}
231-
if [ "$WAIT_RET" != "0" ]; then
232-
# Cleanup
233-
kill $SERVER_PID > /dev/null 2>&1 || true
234-
echo -e "\n***\n*** Failed to start $SERVER\n***"
235-
cat $SERVER_LOG
236-
exit 1
237-
fi
238-
239-
set -e
240-
python3 ${TOOLS_DIR}/inflight_batcher_llm/benchmark_core_model.py \
241-
--max-input-len=500 \
242-
dataset --dataset=${DATASET} \
243-
--tokenizer-dir=${TOKENIZER_DIR}
244-
245-
if [ $? -ne 0 ]; then
246-
cat $SERVER_LOG
247-
echo -e "\n***\n*** Error executing v1 benchmark_core_model test with ${NUM_GPU}GPU(s): line ${LINENO}\n***"
248-
kill_server
249-
wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
250-
RET=1
251-
fi
252-
set +e
253-
254-
set -e
255-
python3 ${TOOLS_DIR}/inflight_batcher_llm/end_to_end_test.py \
256-
--max-input-len=500 \
257-
--dataset=${DATASET}
258231

259-
if [ $? -ne 0 ]; then
232+
# Expect invalid GPT model type error to be gracefully handled
233+
if [ `grep -c "Static batching type is deprecated" $SERVER_LOG` == "0" ]; then
234+
echo -e "\n***\n*** GPT model type error not handled gracefully: line ${LINENO}\n***"
260235
cat $SERVER_LOG
261-
echo -e "\n***\n*** Error executing v1 end-to-end test with ${NUM_GPU}GPU(s): line ${LINENO}\n***"
262-
kill_server
263-
wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
264-
RET=1
236+
exit 1
265237
fi
266-
set +e
267-
268-
# Make sure the metrics is retrieved after the server has updated the metrics internally
269-
sleep ${SLEEP_DURATION}
270-
curl localhost:8002/metrics -o ${NUM_GPU}gpu_v1_no_stream_metrics.out
271-
272-
kill_server
273-
wait_for_server_terminated ${SERVER_TIMEOUT} ${SERVER_PID[@]}
274238

275239
# inflight batching ON
276240
# streaming OFF

0 commit comments

Comments
 (0)