From a10593fe956545357557a2fd370276e98015c8cb Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 11 Feb 2025 16:18:37 -0500 Subject: [PATCH] [Frontend] Pass pre-created socket to uvicorn I noticed that the `fd=...` argument had been re-added to our uvicorn configuration, but only for mac, in PR #11696.. A previous PR, #10012, includes an explanation of why we should not be using this argument, as it does not behave as intended. There is another way to do what we really want, which is to have uvicorn re-use a socket we have already created. This change implements that. For the pre-created socket, we previously set `SO_REUSEADDR`, but this change adds `SO_REUSEPORT` as well. While it's not strictly necessary in this PR, it will be needed when we start running multiple API server processes (issue #12705). I was already changing related code here, and the change also helps demonstrate the value of re-using the existing socket. A helpful explanation of `SO_REUSEPORT` can be found here: One side-effect of this change is that uvicorn no longer emits a message showing the host and port number in use when it starts the server, so this includes a new INFO log message on the vLLM side that does the same thing prior to starting the server. Signed-off-by: Russell Bryant --- vllm/entrypoints/api_server.py | 1 + vllm/entrypoints/launcher.py | 9 ++++++--- vllm/entrypoints/openai/api_server.py | 13 ++++++++++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index 96818507d589..00793d4b9677 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -127,6 +127,7 @@ async def run_server(args: Namespace, shutdown_task = await serve_http( app, + sock=None, host=args.host, port=args.port, log_level=args.log_level, diff --git a/vllm/entrypoints/launcher.py b/vllm/entrypoints/launcher.py index 351a39525fa6..79946a498dad 100644 --- a/vllm/entrypoints/launcher.py +++ b/vllm/entrypoints/launcher.py @@ -2,8 +2,9 @@ import asyncio import signal +import socket from http import HTTPStatus -from typing import Any +from typing import Any, Optional import uvicorn from fastapi import FastAPI, Request, Response @@ -17,7 +18,8 @@ logger = init_logger(__name__) -async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): +async def serve_http(app: FastAPI, sock: Optional[socket.socket], + **uvicorn_kwargs: Any): logger.info("Available routes are:") for route in app.routes: methods = getattr(route, "methods", None) @@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): loop = asyncio.get_running_loop() - server_task = loop.create_task(server.serve()) + server_task = loop.create_task( + server.serve(sockets=[sock] if sock else None)) def signal_handler() -> None: # prevents the uvicorn signal handler to exit early diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b8f54d6c7804..893add8b4585 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -10,7 +10,6 @@ import re import signal import socket -import sys import tempfile import uuid from argparse import Namespace @@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket: sock = socket.socket(family=family, type=socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) sock.bind(addr) return sock @@ -878,8 +878,17 @@ def signal_handler(*_) -> None: model_config = await engine_client.get_model_config() await init_app_state(engine_client, model_config, app.state, args) + def _listen_addr(a: str) -> str: + if is_valid_ipv6_address(a): + return '[' + a + ']' + return a or "0.0.0.0" + + logger.info("Starting vLLM API server on http://%s:%d", + _listen_addr(sock_addr[0]), sock_addr[1]) + shutdown_task = await serve_http( app, + sock=sock, host=args.host, port=args.port, log_level=args.uvicorn_log_level, @@ -888,8 +897,6 @@ def signal_handler(*_) -> None: ssl_certfile=args.ssl_certfile, ssl_ca_certs=args.ssl_ca_certs, ssl_cert_reqs=args.ssl_cert_reqs, - # Workaround to work on macOS - fd=sock.fileno() if sys.platform.startswith("darwin") else None, **uvicorn_kwargs, )