3434from vllm .v1 .engine .core_client import CoreEngineProcManager
3535from vllm .v1 .executor .abstract import Executor
3636
37- from dynamo .sdk import async_on_start , endpoint , service
37+ from dynamo .sdk import async_on_start , dynamo_context , endpoint , service
3838
3939logger = logging .getLogger (__name__ )
4040
@@ -44,8 +44,8 @@ def __init__(self):
4444 class_name = self .__class__ .__name__
4545 self .engine_args = parse_vllm_args (class_name , "" )
4646
47- signal .signal (signal .SIGTERM , self .shutdown_vllm_engine )
48- signal .signal (signal .SIGINT , self .shutdown_vllm_engine )
47+ signal .signal (signal .SIGTERM , self .graceful_shutdown )
48+ signal .signal (signal .SIGINT , self .graceful_shutdown )
4949
5050 self .set_side_channel_host_and_port ()
5151
@@ -60,9 +60,21 @@ async def async_init(self):
6060
6161 logger .info ("VllmWorker has been initialized" )
6262
63- def shutdown_vllm_engine (self , signum , frame ):
64- """Shutdown the background loop"""
65- logger .info (f"Received signal { signum } , shutting down" )
63+ def graceful_shutdown (self , signum , frame ):
64+ """
65+ Gracefully shutdown the worker by shutting down the dynamo runtime.
66+ This will
67+ 1. disable the generate endpoint so no new requests are accepted.
68+ 2. wait until all in-flight requests are completed.
69+ 3. finish the awaiting for the endpoint service.
70+ 4. rely on python's garbage collection to clean up the GPU.
71+ """
72+ logger .info ("Shutting down dynamo runtime..." )
73+ dynamo_context ["runtime" ].shutdown ()
74+ logger .info ("Dynamo runtime shutdown complete." )
75+
76+ def shutdown_vllm_worker (self , signum , frame ):
77+ """Shutdown the worker immediately by killing the background loop"""
6678 loop = asyncio .get_event_loop ()
6779 try :
6880 self .engine_client .close ()
@@ -100,7 +112,7 @@ def set_side_channel_host_and_port(
100112 This sets the port number for the side channel.
101113 """
102114 if hostname is None :
103- hostname = socket . gethostname ()
115+ hostname = "127.0.0.1"
104116 if port is None :
105117 with socket .socket (socket .AF_INET , socket .SOCK_STREAM ) as s :
106118 s .bind (("" , 0 )) # Bind to a free port provided by the host.
0 commit comments