diff --git a/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm b/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm index 98cf4935..87bfb531 100644 --- a/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm +++ b/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm @@ -5,7 +5,7 @@ ARG VLLM_BASE_IMAGE=${VLLM_BASE_REPO}:v${VLLM_VERSION} FROM ${VLLM_BASE_IMAGE} AS base RUN apt-get update \ - && apt-get install -y wget gdb psmisc dumb-init \ + && apt-get install -y wget gdb psmisc dumb-init iproute2 netcat \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* \ apt-get clean diff --git a/model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py b/model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py index 473df928..22a51d6a 100644 --- a/model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py +++ b/model-engine/model_engine_server/inference/vllm/init_ray_batch_inf_v2.py @@ -229,7 +229,18 @@ def main(mode: str): if __name__ == "__main__": + import os + parser = argparse.ArgumentParser() parser.add_argument("--mode", choices=["wait_for_head_node_to_exit"], required=True) + # export environment variable to disable ray logging + os.environ["NCCL_DEBUG"] = "INFO" + os.environ["NCCL_DEBUG_SUBSYS"] = "INIT,NET" + # os.environ["FI_PROVIDER"] = "efa" # you’re requesting EFA devices + # os.environ["AWS_OFI_NCCL"] = "1" + os.environ["NCCL_IB_DISABLE"] = "0" + # os.environ["NCCL_SOCKET_IFNAME"] = "eth0,eth1" # include the real NICs (EFA is commonly on eth1) + os.environ["NCCL_CROSS_NIC"] = "1" # allow cross-NIC if ranks land on different NICs + os.environ["NCCL_NET_GDR_LEVEL"] = "0" args = parser.parse_args() main(args.mode) diff --git a/model-engine/model_engine_server/inference/vllm/requirements-batch.txt b/model-engine/model_engine_server/inference/vllm/requirements-batch.txt index d339f593..d5ef1bdf 100644 --- a/model-engine/model_engine_server/inference/vllm/requirements-batch.txt +++ b/model-engine/model_engine_server/inference/vllm/requirements-batch.txt @@ -1,8 +1,11 @@ pydantic>=2.8 boto3==1.34.15 smart-open==6.4.0 -ddtrace==2.11.0 -datadog==0.49.1 +# ddtrace==2.11.0 +ddtrace==2.21.11 +# datadog==0.49.1 +wrapt>=1.15,<2 +datadog==0.52.1 dataclasses-json~=0.6.7 sse-starlette==2.1.3 -ray[client]==2.37.0 \ No newline at end of file +ray[client]==2.48.0 \ No newline at end of file