Skip to content

Commit f9f0129

Browse files
authored
[serve.llm] release tests for 1p1d (#53190)
Signed-off-by: Linkun Chen <[email protected]>
1 parent 0827d7a commit f9f0129

File tree

7 files changed

+42
-29
lines changed

7 files changed

+42
-29
lines changed

python/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,12 @@ def __init__(
213213

214214
# We need to overwrite the engine_id to make it unique across replicas.
215215
# "engine_id" is added in vllm 0.9.0, so do existance check.
216-
if "engine_id" in kv_transfer_config.model_fields:
217-
engine_id = getattr(kv_transfer_config, "engine_id", uuid.uuid4())
218-
host = vllm.envs.NIXL_SIDE_CHANNEL_HOST
219-
port = vllm.envs.NIXL_SIDE_CHANNEL_PORT
220-
kv_transfer_config.engine_id = "-".join([engine_id, host, port])
221-
else:
216+
try:
217+
engine_id = getattr(kv_transfer_config, "engine_id", str(uuid.uuid4()))
218+
host = getattr(vllm.envs, "VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost")
219+
port = getattr(vllm.envs, "VLLM_NIXL_SIDE_CHANNEL_PORT", 5557)
220+
kv_transfer_config.engine_id = "-".join([engine_id, host, str(port)])
221+
except ValueError:
222222
# TODO(lk-chen): Raise error once vllm 0.9.0 is pinned to rayllm
223223
logger.warning(
224224
"engine_id is not supported in vllm < 0.9.0, NIXL-backed kv transfer "

release/llm_tests/serve/configs/model_config/qwen_3_0dot6B_1replica.yaml

Lines changed: 0 additions & 19 deletions
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
applications:
22
- args:
3-
prefill_config: ./configs/model_config/qwen_3_0dot6B_1replica.yaml
4-
decode_config: ./configs/model_config/qwen_3_0dot6B_1replica.yaml
3+
prefill_config: ./configs/model_config/llama_3dot1_8b_quantized_tp1.yaml
4+
decode_config: ./configs/model_config/llama_3dot1_8b_quantized_tp1.yaml
55
import_path: ray.llm._internal.serve.deployments.prefill_decode_disagg.prefill_decode_disagg:build_app
66
name: llm-endpoint
77
route_prefix: /

release/llm_tests/serve/probes/test_exact_correctness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def deterministic_querier(openai_async_client):
2121
}
2222

2323
COUNTING_PATTERN_RESPONSES_BY_MODEL = {
24-
"default": ["Five", "five", "Five.", "five."],
24+
"default": ("Five", "five", "Five.", "five."),
2525
}
2626

2727

release/llm_tests/serve/run_llm_serve_test_and_bms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ def main(
100100
env_vars = get_hf_token_env_var() if not skip_hf_token else {}
101101
vllm_use_v1_env = "1" if vllm_use_v1 else "0"
102102
env_vars["VLLM_USE_V1"] = vllm_use_v1_env
103-
llm_config = get_llm_config(serve_config_file)
104103

105104
if run_vllm_profiler:
106105

@@ -149,6 +148,7 @@ def main(
149148
raise RuntimeError(f"Tests failed! {exit_code=}")
150149

151150
if run_serve_llm_profiler:
151+
llm_config = get_llm_config(serve_config_file)
152152
# For now, the values are hardcoded.
153153
results = run_bm(
154154
api_url=api_url,
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
# This script is used to build an extra layer on top of the base llm image
3+
# to install vllm at specific version that includes necessary changes for
4+
# PD-disaggregated serving.
5+
6+
set -exo pipefail
7+
8+
# https://github.com/vllm-project/vllm/pull/17751 (Nixl Integration. May 12)
9+
pip3 install --no-cache-dir \
10+
"vllm@https://wheels.vllm.ai/d19110204c03e9b77ed957fc70c1262ff370f5e2/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"

release/release_tests.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4304,6 +4304,28 @@
43044304
long_running: false
43054305
script: pytest -vs test_llm_serve_integration.py
43064306

4307+
- name: llm_serve_llama_3dot1_8B_quantized_tp1_1p1d
4308+
frequency: nightly
4309+
python: "3.11"
4310+
group: llm-serve
4311+
team: llm
4312+
working_dir: llm_tests/serve
4313+
4314+
cluster:
4315+
byod:
4316+
type: llm-cu124
4317+
# TODO(lk-chen): remove once we bump vllm to 0.9.0
4318+
post_build_script: byod_llm_pd_disagg_test.sh
4319+
cluster_compute: llm_auto_select_worker.yaml
4320+
# NOTE: Important for getting the correct secrets
4321+
cloud_id: cld_wy5a6nhazplvu32526ams61d98
4322+
project_id: prj_lhlrf1u5yv8qz9qg3xzw8fkiiq
4323+
4324+
run:
4325+
timeout: 3600
4326+
long_running: false
4327+
script: python run_llm_serve_test_and_bms.py --serve-config-file configs/serve_llama_3dot1_8b_quantized_tp1_1p1d.yaml --skip-hf-token true
4328+
43074329

43084330
##############
43094331
# LLM Batch

0 commit comments

Comments
 (0)