Skip to content

Commit 2ab1132

Browse files
wallashssyoukaichaonjhill
authored andcommitted
[Misc] Deprecation Warning when setting --engine-use-ray (vllm-project#7424)
Signed-off-by: Wallas Santos <[email protected]> Co-authored-by: youkaichao <[email protected]> Co-authored-by: Nick Hill <[email protected]> Co-authored-by: youkaichao <[email protected]> Signed-off-by: Alvant <[email protected]>
1 parent 6955de2 commit 2ab1132

File tree

7 files changed

+56
-3
lines changed

7 files changed

+56
-3
lines changed

tests/async_engine/test_api_server.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import subprocess
23
import sys
34
import time
@@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
3536
"127.0.0.1", "--tokenizer-pool-size",
3637
str(tokenizer_pool_size)
3738
]
39+
40+
# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
41+
# to prevent `--engine-use-ray` raises an exception due to it deprecation
42+
env_vars = os.environ.copy()
43+
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
44+
3845
if engine_use_ray:
3946
commands.append("--engine-use-ray")
4047
if worker_use_ray:
4148
commands.append("--worker-use-ray")
42-
uvicorn_process = subprocess.Popen(commands)
49+
uvicorn_process = subprocess.Popen(commands, env=env_vars)
4350
yield
4451
uvicorn_process.terminate()
4552

tests/async_engine/test_async_llm_engine.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import os
23
from dataclasses import dataclass
34

45
import pytest
@@ -106,11 +107,16 @@ async def test_new_requests_event():
106107
assert engine.engine.add_request_calls == 3
107108
assert engine.engine.step_calls == old_step_calls + 1
108109

110+
# Allow deprecated engine_use_ray to not raise exception
111+
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
112+
109113
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
110114
assert engine.get_model_config() is not None
111115
assert engine.get_tokenizer() is not None
112116
assert engine.get_decoding_config() is not None
113117

118+
os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")
119+
114120

115121
def test_asyncio_run():
116122
wait_for_gpu_memory_to_clear(

tests/async_engine/test_openapi_server_ray.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ def server():
2323
str(chatml_jinja_path),
2424
]
2525

26-
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
26+
# Allow `--engine-use-ray`, otherwise the launch of the server throw
27+
# an error due to try to use a deprecated feature
28+
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
29+
with RemoteOpenAIServer(MODEL_NAME, args,
30+
env_dict=env_dict) as remote_server:
2731
yield remote_server
2832

2933

tests/spec_decode/e2e/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import os
23
from itertools import cycle
34
from typing import Dict, List, Optional, Sequence, Tuple, Union
45

@@ -56,6 +57,11 @@ def __init__(
5657
) -> None:
5758
if "disable_log_stats" not in kwargs:
5859
kwargs["disable_log_stats"] = True
60+
61+
# Needed to engine_use_ray works as a deprecated feature,
62+
# otherwise the following constructor will raise an exception
63+
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"
64+
5965
engine_args = AsyncEngineArgs(
6066
model=model,
6167
tokenizer=tokenizer,

vllm/engine/arg_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,13 @@ def add_cli_args(parser: FlexibleArgumentParser,
923923
parser.add_argument('--engine-use-ray',
924924
action='store_true',
925925
help='Use Ray to start the LLM engine in a '
926-
'separate process as the server process.')
926+
'separate process as the server process.'
927+
'(DEPRECATED. This argument is deprecated '
928+
'and will be removed in a future update. '
929+
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
930+
'use it. See '
931+
'https://github.com/vllm-project/vllm/issues/7045.'
932+
')')
927933
parser.add_argument('--disable-log-requests',
928934
action='store_true',
929935
help='Disable logging requests.')

vllm/engine/async_llm_engine.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from vllm.sampling_params import SamplingParams
3030
from vllm.sequence import ExecuteModelRequest, SamplerOutput
3131
from vllm.usage.usage_lib import UsageContext
32+
from vllm.utils import print_warning_once
3233

3334
logger = init_logger(__name__)
3435
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
@@ -553,6 +554,20 @@ def __init__(self,
553554
self.log_requests = log_requests
554555
self.engine = self._init_engine(*args, **kwargs)
555556

557+
if self.engine_use_ray:
558+
print_warning_once(
559+
"DEPRECATED. `--engine-use-ray` is deprecated and will "
560+
"be removed in a future update. "
561+
"See https://github.com/vllm-project/vllm/issues/7045.")
562+
563+
if envs.VLLM_ALLOW_ENGINE_USE_RAY:
564+
print_warning_once(
565+
"VLLM_ALLOW_ENGINE_USE_RAY is set, force engine use Ray")
566+
else:
567+
raise ValueError("`--engine-use-ray` is deprecated. "
568+
"Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to "
569+
"force use it")
570+
556571
self.background_loop: Optional[asyncio.Future] = None
557572
# We need to keep a reference to unshielded
558573
# task as well to prevent it from being garbage

vllm/envs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
VERBOSE: bool = False
5656
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
5757
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
58+
VLLM_ALLOW_ENGINE_USE_RAY: bool = False
5859
VLLM_PLUGINS: Optional[List[str]] = None
5960

6061

@@ -364,6 +365,14 @@ def get_default_config_root():
364365
(os.environ.get("VLLM_TEST_FORCE_FP8_MARLIN", "0").strip().lower() in
365366
("1", "true")),
366367

368+
# If set, allow running the engine as a separate ray actor,
369+
# which is a deprecated feature soon to be removed.
370+
# See https://github.com/vllm-project/vllm/issues/7045
371+
"VLLM_ALLOW_ENGINE_USE_RAY":
372+
lambda:
373+
(os.environ.get("VLLM_ALLOW_ENGINE_USE_RAY", "0").strip().lower() in
374+
("1", "true")),
375+
367376
# a list of plugin names to load, separated by commas.
368377
# if this is not set, it means all plugins will be loaded
369378
# if this is set to an empty string, no plugins will be loaded

0 commit comments

Comments
 (0)