Skip to content

Commit 08891ff

Browse files
authored
fix: Update trtllm tests to use new scripts instead of dynamo serve (#1979)
1 parent 49b7a0d commit 08891ff

File tree

2 files changed

+269
-189
lines changed

2 files changed

+269
-189
lines changed

tests/serve/test_dynamo_serve.py

Lines changed: 0 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
DeploymentGraph,
2626
Payload,
2727
chat_completions_response_handler,
28-
completions_response_handler,
2928
)
3029
from tests.utils.managed_process import ManagedProcess
3130

@@ -56,106 +55,7 @@
5655
expected_response=["bus"],
5756
)
5857

59-
text_payload = Payload(
60-
payload_chat={
61-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
62-
"messages": [
63-
{
64-
"role": "user",
65-
"content": text_prompt, # Shorter prompt
66-
}
67-
],
68-
"max_tokens": 150, # Reduced from 500
69-
"temperature": 0.1,
70-
# "seed": 0,
71-
},
72-
payload_completions={
73-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
74-
"prompt": text_prompt,
75-
"max_tokens": 150,
76-
"temperature": 0.1,
77-
# "seed": 0,
78-
},
79-
repeat_count=10,
80-
expected_log=[],
81-
expected_response=["AI"],
82-
)
83-
8458
deployment_graphs = {
85-
"agg": (
86-
DeploymentGraph(
87-
module="graphs.agg:Frontend",
88-
config="configs/agg.yaml",
89-
directory="/workspace/examples/llm",
90-
endpoints=["v1/chat/completions"],
91-
response_handlers=[
92-
chat_completions_response_handler,
93-
],
94-
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
95-
),
96-
text_payload,
97-
),
98-
"sglang_agg": (
99-
DeploymentGraph(
100-
module="graphs.agg:Frontend",
101-
config="configs/agg.yaml",
102-
directory="/workspace/examples/sglang",
103-
endpoints=["v1/chat/completions", "v1/completions"],
104-
response_handlers=[
105-
chat_completions_response_handler,
106-
completions_response_handler,
107-
],
108-
marks=[pytest.mark.gpu_1, pytest.mark.sglang],
109-
),
110-
text_payload,
111-
),
112-
"disagg": (
113-
DeploymentGraph(
114-
module="graphs.disagg:Frontend",
115-
config="configs/disagg.yaml",
116-
directory="/workspace/examples/llm",
117-
endpoints=["v1/chat/completions"],
118-
response_handlers=[
119-
chat_completions_response_handler,
120-
],
121-
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
122-
),
123-
text_payload,
124-
),
125-
"agg_router": (
126-
DeploymentGraph(
127-
module="graphs.agg_router:Frontend",
128-
config="configs/agg_router.yaml",
129-
directory="/workspace/examples/llm",
130-
endpoints=["v1/chat/completions"],
131-
response_handlers=[
132-
chat_completions_response_handler,
133-
],
134-
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
135-
# FIXME: This is a hack to allow deployments to start before sending any requests.
136-
# When using KV-router, if all the endpoints are not registered, the service
137-
# enters a non-recoverable state.
138-
delayed_start=120,
139-
),
140-
text_payload,
141-
),
142-
"disagg_router": (
143-
DeploymentGraph(
144-
module="graphs.disagg_router:Frontend",
145-
config="configs/disagg_router.yaml",
146-
directory="/workspace/examples/llm",
147-
endpoints=["v1/chat/completions"],
148-
response_handlers=[
149-
chat_completions_response_handler,
150-
],
151-
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
152-
# FIXME: This is a hack to allow deployments to start before sending any requests.
153-
# When using KV-router, if all the endpoints are not registered, the service
154-
# enters a non-recoverable state.
155-
delayed_start=120,
156-
),
157-
text_payload,
158-
),
15959
"multimodal_agg": (
16060
DeploymentGraph(
16161
module="graphs.agg:Frontend",
@@ -169,84 +69,6 @@
16969
),
17070
multimodal_payload,
17171
),
172-
"vllm_v1_agg": (
173-
DeploymentGraph(
174-
module="graphs.agg:Frontend",
175-
config="configs/agg.yaml",
176-
directory="/workspace/examples/vllm_v1",
177-
endpoints=["v1/chat/completions", "v1/completions"],
178-
response_handlers=[
179-
chat_completions_response_handler,
180-
completions_response_handler,
181-
],
182-
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
183-
),
184-
text_payload,
185-
),
186-
"trtllm_agg": (
187-
DeploymentGraph(
188-
module="graphs.agg:Frontend",
189-
config="configs/agg.yaml",
190-
directory="/workspace/examples/tensorrt_llm",
191-
endpoints=["v1/chat/completions", "v1/completions"],
192-
response_handlers=[
193-
chat_completions_response_handler,
194-
completions_response_handler,
195-
],
196-
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
197-
),
198-
text_payload,
199-
),
200-
"trtllm_agg_router": (
201-
DeploymentGraph(
202-
module="graphs.agg:Frontend",
203-
config="configs/agg_router.yaml",
204-
directory="/workspace/examples/tensorrt_llm",
205-
endpoints=["v1/chat/completions", "v1/completions"],
206-
response_handlers=[
207-
chat_completions_response_handler,
208-
completions_response_handler,
209-
],
210-
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
211-
# FIXME: This is a hack to allow deployments to start before sending any requests.
212-
# When using KV-router, if all the endpoints are not registered, the service
213-
# enters a non-recoverable state.
214-
delayed_start=120,
215-
),
216-
text_payload,
217-
),
218-
"trtllm_disagg": (
219-
DeploymentGraph(
220-
module="graphs.disagg:Frontend",
221-
config="configs/disagg.yaml",
222-
directory="/workspace/examples/tensorrt_llm",
223-
endpoints=["v1/chat/completions", "v1/completions"],
224-
response_handlers=[
225-
chat_completions_response_handler,
226-
completions_response_handler,
227-
],
228-
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
229-
),
230-
text_payload,
231-
),
232-
"trtllm_disagg_router": (
233-
DeploymentGraph(
234-
module="graphs.disagg:Frontend",
235-
config="configs/disagg_router.yaml",
236-
directory="/workspace/examples/tensorrt_llm",
237-
endpoints=["v1/chat/completions", "v1/completions"],
238-
response_handlers=[
239-
chat_completions_response_handler,
240-
completions_response_handler,
241-
],
242-
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
243-
# FIXME: This is a hack to allow deployments to start before sending any requests.
244-
# When using KV-router, if all the endpoints are not registered, the service
245-
# enters a non-recoverable state.
246-
delayed_start=120,
247-
),
248-
text_payload,
249-
),
25072
}
25173

25274

@@ -394,17 +216,6 @@ def wait_for_ready(self, payload, logger=logging.getLogger()):
394216
@pytest.fixture(
395217
params=[
396218
pytest.param("multimodal_agg", marks=[pytest.mark.vllm, pytest.mark.gpu_2]),
397-
pytest.param("trtllm_agg", marks=[pytest.mark.tensorrtllm, pytest.mark.gpu_1]),
398-
pytest.param(
399-
"trtllm_agg_router", marks=[pytest.mark.tensorrtllm, pytest.mark.gpu_1]
400-
),
401-
pytest.param(
402-
"trtllm_disagg", marks=[pytest.mark.tensorrtllm, pytest.mark.gpu_2]
403-
),
404-
pytest.param(
405-
"trtllm_disagg_router", marks=[pytest.mark.tensorrtllm, pytest.mark.gpu_2]
406-
),
407-
# pytest.param("sglang", marks=[pytest.mark.sglang, pytest.mark.gpu_2]),
408219
]
409220
)
410221
def deployment_graph_test(request):

0 commit comments

Comments
 (0)