Skip to content

Commit 49b7e93

Browse files
authored
feat: add graceful shutdown in vllm_1 (#1562)
1 parent 44c5be7 commit 49b7e93

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

examples/vllm_v1/components/worker.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from vllm.v1.engine.core_client import CoreEngineProcManager
3535
from vllm.v1.executor.abstract import Executor
3636

37-
from dynamo.sdk import async_on_start, endpoint, service
37+
from dynamo.sdk import async_on_start, dynamo_context, endpoint, service
3838

3939
logger = logging.getLogger(__name__)
4040

@@ -44,8 +44,8 @@ def __init__(self):
4444
class_name = self.__class__.__name__
4545
self.engine_args = parse_vllm_args(class_name, "")
4646

47-
signal.signal(signal.SIGTERM, self.shutdown_vllm_engine)
48-
signal.signal(signal.SIGINT, self.shutdown_vllm_engine)
47+
signal.signal(signal.SIGTERM, self.graceful_shutdown)
48+
signal.signal(signal.SIGINT, self.graceful_shutdown)
4949

5050
self.set_side_channel_host_and_port()
5151

@@ -60,9 +60,21 @@ async def async_init(self):
6060

6161
logger.info("VllmWorker has been initialized")
6262

63-
def shutdown_vllm_engine(self, signum, frame):
64-
"""Shutdown the background loop"""
65-
logger.info(f"Received signal {signum}, shutting down")
63+
def graceful_shutdown(self, signum, frame):
64+
"""
65+
Gracefully shutdown the worker by shutting down the dynamo runtime.
66+
This will
67+
1. disable the generate endpoint so no new requests are accepted.
68+
2. wait until all in-flight requests are completed.
69+
3. finish the awaiting for the endpoint service.
70+
4. rely on python's garbage collection to clean up the GPU.
71+
"""
72+
logger.info("Shutting down dynamo runtime...")
73+
dynamo_context["runtime"].shutdown()
74+
logger.info("Dynamo runtime shutdown complete.")
75+
76+
def shutdown_vllm_worker(self, signum, frame):
77+
"""Shutdown the worker immediately by killing the background loop"""
6678
loop = asyncio.get_event_loop()
6779
try:
6880
self.engine_client.close()
@@ -100,7 +112,7 @@ def set_side_channel_host_and_port(
100112
This sets the port number for the side channel.
101113
"""
102114
if hostname is None:
103-
hostname = socket.gethostname()
115+
hostname = "127.0.0.1"
104116
if port is None:
105117
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
106118
s.bind(("", 0)) # Bind to a free port provided by the host.

0 commit comments

Comments
 (0)