diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index 96818507d589..00793d4b9677 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -127,6 +127,7 @@ async def run_server(args: Namespace, shutdown_task = await serve_http( app, + sock=None, host=args.host, port=args.port, log_level=args.log_level, diff --git a/vllm/entrypoints/launcher.py b/vllm/entrypoints/launcher.py index 351a39525fa6..79946a498dad 100644 --- a/vllm/entrypoints/launcher.py +++ b/vllm/entrypoints/launcher.py @@ -2,8 +2,9 @@ import asyncio import signal +import socket from http import HTTPStatus -from typing import Any +from typing import Any, Optional import uvicorn from fastapi import FastAPI, Request, Response @@ -17,7 +18,8 @@ logger = init_logger(__name__) -async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): +async def serve_http(app: FastAPI, sock: Optional[socket.socket], + **uvicorn_kwargs: Any): logger.info("Available routes are:") for route in app.routes: methods = getattr(route, "methods", None) @@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any): loop = asyncio.get_running_loop() - server_task = loop.create_task(server.serve()) + server_task = loop.create_task( + server.serve(sockets=[sock] if sock else None)) def signal_handler() -> None: # prevents the uvicorn signal handler to exit early diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b8f54d6c7804..893add8b4585 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -10,7 +10,6 @@ import re import signal import socket -import sys import tempfile import uuid from argparse import Namespace @@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket: sock = socket.socket(family=family, type=socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) sock.bind(addr) return sock @@ -878,8 +878,17 @@ def signal_handler(*_) -> None: model_config = await engine_client.get_model_config() await init_app_state(engine_client, model_config, app.state, args) + def _listen_addr(a: str) -> str: + if is_valid_ipv6_address(a): + return '[' + a + ']' + return a or "0.0.0.0" + + logger.info("Starting vLLM API server on http://%s:%d", + _listen_addr(sock_addr[0]), sock_addr[1]) + shutdown_task = await serve_http( app, + sock=sock, host=args.host, port=args.port, log_level=args.uvicorn_log_level, @@ -888,8 +897,6 @@ def signal_handler(*_) -> None: ssl_certfile=args.ssl_certfile, ssl_ca_certs=args.ssl_ca_certs, ssl_cert_reqs=args.ssl_cert_reqs, - # Workaround to work on macOS - fd=sock.fileno() if sys.platform.startswith("darwin") else None, **uvicorn_kwargs, )