From d058de5f4b1f7d17f98057de398db743369ec7df Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:55:28 +0000 Subject: [PATCH 1/6] Move Dockerfiles to `dockerfiles/` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/release-pipeline.yaml | 4 ++-- .buildkite/run-cpu-test-ppc64le.sh | 2 +- .buildkite/run-cpu-test.sh | 4 ++-- .buildkite/run-hpu-test.sh | 2 +- .buildkite/run-neuron-test.sh | 2 +- .buildkite/run-openvino-test.sh | 2 +- .buildkite/run-tpu-test.sh | 2 +- .buildkite/run-xpu-test.sh | 2 +- .github/mergify.yml | 2 +- .github/workflows/lint-and-deploy.yaml | 2 +- CMakeLists.txt | 2 +- Dockerfile => dockerfiles/Dockerfile | 0 Dockerfile.arm => dockerfiles/Dockerfile.arm | 0 Dockerfile.cpu => dockerfiles/Dockerfile.cpu | 0 Dockerfile.hpu => dockerfiles/Dockerfile.hpu | 0 Dockerfile.neuron => dockerfiles/Dockerfile.neuron | 0 .../Dockerfile.openvino | 0 .../Dockerfile.ppc64le | 0 Dockerfile.rocm => dockerfiles/Dockerfile.rocm | 0 .../Dockerfile.rocm_base | 0 Dockerfile.s390x => dockerfiles/Dockerfile.s390x | 0 Dockerfile.tpu => dockerfiles/Dockerfile.tpu | 0 Dockerfile.xpu => dockerfiles/Dockerfile.xpu | 0 docs/source/contributing/dockerfile/dockerfile.md | 6 +++--- docs/source/deployment/docker.md | 2 +- docs/source/deployment/nginx.md | 4 ++-- .../installation/ai_accelerator/hpu-gaudi.inc.md | 2 +- .../installation/ai_accelerator/neuron.inc.md | 2 +- .../installation/ai_accelerator/openvino.inc.md | 2 +- .../installation/ai_accelerator/tpu.inc.md | 4 ++-- .../getting_started/installation/cpu/index.md | 4 ++-- .../getting_started/installation/gpu/rocm.inc.md | 14 +++++++------- .../getting_started/installation/gpu/xpu.inc.md | 2 +- docs/source/getting_started/quickstart.md | 2 +- 34 files changed, 35 insertions(+), 35 deletions(-) rename Dockerfile => dockerfiles/Dockerfile (100%) rename Dockerfile.arm => dockerfiles/Dockerfile.arm (100%) rename Dockerfile.cpu => dockerfiles/Dockerfile.cpu (100%) rename Dockerfile.hpu => dockerfiles/Dockerfile.hpu (100%) rename Dockerfile.neuron => dockerfiles/Dockerfile.neuron (100%) rename Dockerfile.openvino => dockerfiles/Dockerfile.openvino (100%) rename Dockerfile.ppc64le => dockerfiles/Dockerfile.ppc64le (100%) rename Dockerfile.rocm => dockerfiles/Dockerfile.rocm (100%) rename Dockerfile.rocm_base => dockerfiles/Dockerfile.rocm_base (100%) rename Dockerfile.s390x => dockerfiles/Dockerfile.s390x (100%) rename Dockerfile.tpu => dockerfiles/Dockerfile.tpu (100%) rename Dockerfile.xpu => dockerfiles/Dockerfile.xpu (100%) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 37cdab9e01ec..4dbe28394432 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -57,7 +57,7 @@ steps: agents: queue: tpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f Dockerfile.tpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f dockerfiles/Dockerfile.tpu ." - "docker push vllm/vllm-tpu:nightly" - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT" plugins: @@ -82,7 +82,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f dockerfiles/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh index bc06838d804f..4df04cc13bac 100755 --- a/.buildkite/run-cpu-test-ppc64le.sh +++ b/.buildkite/run-cpu-test-ppc64le.sh @@ -10,5 +10,5 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -docker build -t cpu-test -f Dockerfile.ppc64le . +docker build -t cpu-test -f dockerfiles/Dockerfile.ppc64le . diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index f6dad818ddc0..d6db7f6c1dd9 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -9,8 +9,8 @@ CORE_RANGE=${CORE_RANGE:-48-95} NUMA_NODE=${NUMA_NODE:-1} # Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f dockerfiles/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f dockerfiles/Dockerfile.cpu . # Setup cleanup remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; } diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh index f83eb927aae4..a0ab50d3fd7e 100644 --- a/.buildkite/run-hpu-test.sh +++ b/.buildkite/run-hpu-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t hpu-test-env -f Dockerfile.hpu . +docker build -t hpu-test-env -f dockerfiles/Dockerfile.hpu . # Setup cleanup # certain versions of HPU software stack have a bug that can diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh index 55c374fcc33d..89e0ce1af950 100644 --- a/.buildkite/run-neuron-test.sh +++ b/.buildkite/run-neuron-test.sh @@ -35,7 +35,7 @@ else date "+%s" > /tmp/neuron-docker-build-timestamp fi -docker build -t "${image_name}" -f Dockerfile.neuron . +docker build -t "${image_name}" -f dockerfiles/Dockerfile.neuron . # Setup cleanup remove_docker_container() { diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh index a1103bed66ec..4429905687c0 100755 --- a/.buildkite/run-openvino-test.sh +++ b/.buildkite/run-openvino-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t openvino-test -f Dockerfile.openvino . +docker build -t openvino-test -f dockerfiles/Dockerfile.openvino . # Setup cleanup remove_docker_container() { docker rm -f openvino-test || true; } diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh index 650af0fac4c6..f67086840954 100755 --- a/.buildkite/run-tpu-test.sh +++ b/.buildkite/run-tpu-test.sh @@ -3,7 +3,7 @@ set -e # Build the docker image. -docker build -f Dockerfile.tpu -t vllm-tpu . +docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu . # Set up cleanup. remove_docker_container() { docker rm -f tpu-test || true; } diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh index d48639e5720c..1969c5681dd3 100644 --- a/.buildkite/run-xpu-test.sh +++ b/.buildkite/run-xpu-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t xpu-test -f Dockerfile.xpu . +docker build -t xpu-test -f dockerfiles/Dockerfile.xpu . # Setup cleanup remove_docker_container() { docker rm -f xpu-test || true; } diff --git a/.github/mergify.yml b/.github/mergify.yml index 54f56210b286..f5ff660c1421 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -19,7 +19,7 @@ pull_request_rules: - files~=\.buildkite/ - files~=^cmake/ - files=CMakeLists.txt - - files~=^Dockerfile + - files~=^dockerfiles/Dockerfile - files~=^requirements.*\.txt - files=setup.py actions: diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml index b199d0867a64..2e56a1e597f5 100644 --- a/.github/workflows/lint-and-deploy.yaml +++ b/.github/workflows/lint-and-deploy.yaml @@ -50,7 +50,7 @@ jobs: uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 - name: Build the Docker image vllm cpu - run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env . + run: docker buildx build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env . - name: Configuration of docker images, network and namespace for the kind cluster run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 5349b64aecb6..244f650569e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101") # # Note: the CUDA torch version is derived from pyproject.toml and various # requirements.txt files and should be kept consistent. The ROCm torch -# versions are derived from Dockerfile.rocm +# versions are derived from dockerfiles/Dockerfile.rocm # set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1") set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1") diff --git a/Dockerfile b/dockerfiles/Dockerfile similarity index 100% rename from Dockerfile rename to dockerfiles/Dockerfile diff --git a/Dockerfile.arm b/dockerfiles/Dockerfile.arm similarity index 100% rename from Dockerfile.arm rename to dockerfiles/Dockerfile.arm diff --git a/Dockerfile.cpu b/dockerfiles/Dockerfile.cpu similarity index 100% rename from Dockerfile.cpu rename to dockerfiles/Dockerfile.cpu diff --git a/Dockerfile.hpu b/dockerfiles/Dockerfile.hpu similarity index 100% rename from Dockerfile.hpu rename to dockerfiles/Dockerfile.hpu diff --git a/Dockerfile.neuron b/dockerfiles/Dockerfile.neuron similarity index 100% rename from Dockerfile.neuron rename to dockerfiles/Dockerfile.neuron diff --git a/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino similarity index 100% rename from Dockerfile.openvino rename to dockerfiles/Dockerfile.openvino diff --git a/Dockerfile.ppc64le b/dockerfiles/Dockerfile.ppc64le similarity index 100% rename from Dockerfile.ppc64le rename to dockerfiles/Dockerfile.ppc64le diff --git a/Dockerfile.rocm b/dockerfiles/Dockerfile.rocm similarity index 100% rename from Dockerfile.rocm rename to dockerfiles/Dockerfile.rocm diff --git a/Dockerfile.rocm_base b/dockerfiles/Dockerfile.rocm_base similarity index 100% rename from Dockerfile.rocm_base rename to dockerfiles/Dockerfile.rocm_base diff --git a/Dockerfile.s390x b/dockerfiles/Dockerfile.s390x similarity index 100% rename from Dockerfile.s390x rename to dockerfiles/Dockerfile.s390x diff --git a/Dockerfile.tpu b/dockerfiles/Dockerfile.tpu similarity index 100% rename from Dockerfile.tpu rename to dockerfiles/Dockerfile.tpu diff --git a/Dockerfile.xpu b/dockerfiles/Dockerfile.xpu similarity index 100% rename from Dockerfile.xpu rename to dockerfiles/Dockerfile.xpu diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md index 96674805df53..451bbe808695 100644 --- a/docs/source/contributing/dockerfile/dockerfile.md +++ b/docs/source/contributing/dockerfile/dockerfile.md @@ -1,6 +1,6 @@ # Dockerfile -We provide a to construct the image for running an OpenAI compatible server with vLLM. +We provide a to construct the image for running an OpenAI compatible server with vLLM. More information about deploying with Docker can be found [here](#deployment-docker). Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes: @@ -28,7 +28,7 @@ The edges of the build graph represent: > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present): > > ```bash - > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename Dockerfile + > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename dockerfiles/Dockerfile > ``` > > or in case you want to run it directly with the docker image: @@ -43,7 +43,7 @@ The edges of the build graph represent: > --output png \ > --dpi 200 \ > --max-label-length 50 \ - > --filename Dockerfile \ + > --filename dockerfiles/Dockerfile \ > --legend > ``` > diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md index 9e52a2182cfb..7d30e45021bb 100644 --- a/docs/source/deployment/docker.md +++ b/docs/source/deployment/docker.md @@ -61,7 +61,7 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git ## Building vLLM's Docker Image from Source -You can build and run vLLM from source via the provided . To build vLLM: +You can build and run vLLM from source via the provided . To build vLLM: ```console # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2 diff --git a/docs/source/deployment/nginx.md b/docs/source/deployment/nginx.md index 62816f514c00..44f6a71fd475 100644 --- a/docs/source/deployment/nginx.md +++ b/docs/source/deployment/nginx.md @@ -69,14 +69,14 @@ server { ```console cd $vllm_root -docker build -f Dockerfile . --tag vllm +docker build -f dockerfiles/Dockerfile . --tag vllm ``` If you are behind proxy, you can pass the proxy settings to the docker build command as shown below: ```console cd $vllm_root -docker build -f Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy +docker build -f dockerfiles/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy ``` (nginxloadbalancer-nginx-docker-network)= diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index 7e52f6048909..b670e5d4d885 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images. ### Build image from source ```console -docker build -f Dockerfile.hpu -t vllm-hpu-env . +docker build -f dockerfiles/Dockerfile.hpu -t vllm-hpu-env . docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md index 4c668a8e6892..d278cfb8daee 100644 --- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md @@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images. See for instructions on building the Docker image. -Make sure to use in place of the default Dockerfile. +Make sure to use in place of the default Dockerfile. ## Extra information diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md index 5641c1563656..ef64767070a4 100644 --- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md @@ -54,7 +54,7 @@ Currently, there are no pre-built OpenVINO images. ### Build image from source ```console -docker build -f Dockerfile.openvino -t vllm-openvino-env . +docker build -f dockerfiles/Dockerfile.openvino -t vllm-openvino-env . docker run -it --rm vllm-openvino-env ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index 6c7bbf602499..bbb88e77a277 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -169,10 +169,10 @@ See for instructions on using the o ### Build image from source -You can use to build a Docker image with TPU support. +You can use to build a Docker image with TPU support. ```console -docker build -f Dockerfile.tpu -t vllm-tpu . +docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu . ``` Run the Docker image with the following command: diff --git a/docs/source/getting_started/installation/cpu/index.md b/docs/source/getting_started/installation/cpu/index.md index 9c5977939cc5..697fecba69ac 100644 --- a/docs/source/getting_started/installation/cpu/index.md +++ b/docs/source/getting_started/installation/cpu/index.md @@ -134,7 +134,7 @@ Currently, there are no pre-build CPU images. ### Build image from source ```console -$ docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . +$ docker build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . $ docker run -it \ --rm \ --network=host \ @@ -144,7 +144,7 @@ $ docker run -it \ ``` ::::{tip} -For ARM or Apple silicon, use `Dockerfile.arm` +For ARM or Apple silicon, use `dockerfiles/Dockerfile.arm` :::: ## Supported features diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index 4381cef5e96a..3696a04add96 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO #### (Optional) Build an image with ROCm software stack -Build a docker image from which setup ROCm software stack needed by the vLLM. +Build a docker image from which setup ROCm software stack needed by the vLLM. **This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.** If you choose to build this rocm_base image yourself, the steps are as follows. @@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm_base -t rocm/vllm-dev:base . +DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm_base -t rocm/vllm-dev:base . ``` #### Build an image with vLLM -First, build a docker image from and launch a docker container from the image. +First, build a docker image from and launch a docker container from the image. It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon: ```console @@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either } ``` - uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. + uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. It provides flexibility to customize the build of docker image using the following arguments: -- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using +- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build - `BUILD_RPD`: Include RocmProfileData profiling tool in the image - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image @@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm -t vllm-rocm . ``` To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image: ```console -DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f dockerfiles/Dockerfile.rocm -t vllm-rocm . ``` To run the above docker image `vllm-rocm`, use the below command: diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index 9678c25b1dd8..269323a2146b 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -48,7 +48,7 @@ Currently, there are no pre-built XPU images. ### Build image from source ```console -$ docker build -f Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . +$ docker build -f dockerfiles/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . $ docker run -it \ --rm \ --network=host \ diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index 452bee2385fe..785a1fd2a676 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -198,5 +198,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`. ```{attention} -There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see [Dockerfile](https://github.com/vllm-project/vllm/blob/main/Dockerfile) for instructions on how to install it. +There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see for instructions on how to install it. ``` From 957a72c78f4539511b1cc80af90fbb182b0ae78f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 17 Mar 2025 15:44:40 +0100 Subject: [PATCH 2/6] `dockerfiles/` -> `docker/` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/release-pipeline.yaml | 4 ++-- .buildkite/run-cpu-test-ppc64le.sh | 2 +- .buildkite/run-cpu-test.sh | 4 ++-- .buildkite/run-hpu-test.sh | 2 +- .buildkite/run-neuron-test.sh | 2 +- .buildkite/run-openvino-test.sh | 2 +- .buildkite/run-tpu-test.sh | 2 +- .buildkite/run-xpu-test.sh | 2 +- .github/mergify.yml | 2 +- .github/workflows/lint-and-deploy.yaml | 2 +- CMakeLists.txt | 2 +- {dockerfiles => docker}/Dockerfile | 0 {dockerfiles => docker}/Dockerfile.arm | 0 {dockerfiles => docker}/Dockerfile.cpu | 0 {dockerfiles => docker}/Dockerfile.hpu | 0 {dockerfiles => docker}/Dockerfile.neuron | 0 {dockerfiles => docker}/Dockerfile.openvino | 0 {dockerfiles => docker}/Dockerfile.ppc64le | 0 {dockerfiles => docker}/Dockerfile.rocm | 0 {dockerfiles => docker}/Dockerfile.rocm_base | 0 {dockerfiles => docker}/Dockerfile.s390x | 0 {dockerfiles => docker}/Dockerfile.tpu | 0 {dockerfiles => docker}/Dockerfile.xpu | 0 docs/source/contributing/dockerfile/dockerfile.md | 6 +++--- docs/source/deployment/docker.md | 2 +- docs/source/deployment/nginx.md | 4 ++-- .../installation/ai_accelerator/hpu-gaudi.inc.md | 2 +- .../installation/ai_accelerator/neuron.inc.md | 2 +- .../installation/ai_accelerator/openvino.inc.md | 2 +- .../installation/ai_accelerator/tpu.inc.md | 4 ++-- docs/source/getting_started/installation/cpu.md | 4 ++-- .../getting_started/installation/gpu/rocm.inc.md | 14 +++++++------- .../getting_started/installation/gpu/xpu.inc.md | 2 +- docs/source/getting_started/quickstart.md | 2 +- 34 files changed, 35 insertions(+), 35 deletions(-) rename {dockerfiles => docker}/Dockerfile (100%) rename {dockerfiles => docker}/Dockerfile.arm (100%) rename {dockerfiles => docker}/Dockerfile.cpu (100%) rename {dockerfiles => docker}/Dockerfile.hpu (100%) rename {dockerfiles => docker}/Dockerfile.neuron (100%) rename {dockerfiles => docker}/Dockerfile.openvino (100%) rename {dockerfiles => docker}/Dockerfile.ppc64le (100%) rename {dockerfiles => docker}/Dockerfile.rocm (100%) rename {dockerfiles => docker}/Dockerfile.rocm_base (100%) rename {dockerfiles => docker}/Dockerfile.s390x (100%) rename {dockerfiles => docker}/Dockerfile.tpu (100%) rename {dockerfiles => docker}/Dockerfile.xpu (100%) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 4dbe28394432..1776e073610f 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -57,7 +57,7 @@ steps: agents: queue: tpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f dockerfiles/Dockerfile.tpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ." - "docker push vllm/vllm-tpu:nightly" - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT" plugins: @@ -82,7 +82,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f dockerfiles/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh index 4df04cc13bac..9c5cf7cad948 100755 --- a/.buildkite/run-cpu-test-ppc64le.sh +++ b/.buildkite/run-cpu-test-ppc64le.sh @@ -10,5 +10,5 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -docker build -t cpu-test -f dockerfiles/Dockerfile.ppc64le . +docker build -t cpu-test -f docker/Dockerfile.ppc64le . diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index a0034bf2a333..7ddf36b1db99 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -9,8 +9,8 @@ CORE_RANGE=${CORE_RANGE:-48-95} NUMA_NODE=${NUMA_NODE:-1} # Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f dockerfiles/Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f dockerfiles/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f docker/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f docker/Dockerfile.cpu . # Setup cleanup remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; } diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh index a0ab50d3fd7e..95b6ac37f185 100644 --- a/.buildkite/run-hpu-test.sh +++ b/.buildkite/run-hpu-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t hpu-test-env -f dockerfiles/Dockerfile.hpu . +docker build -t hpu-test-env -f docker/Dockerfile.hpu . # Setup cleanup # certain versions of HPU software stack have a bug that can diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh index c7bfa3ea1ce6..ec6a080eb499 100644 --- a/.buildkite/run-neuron-test.sh +++ b/.buildkite/run-neuron-test.sh @@ -35,7 +35,7 @@ else date "+%s" > /tmp/neuron-docker-build-timestamp fi -docker build -t "${image_name}" -f dockerfiles/Dockerfile.neuron . +docker build -t "${image_name}" -f docker/Dockerfile.neuron . # Setup cleanup remove_docker_container() { diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh index 4429905687c0..9be8fde5eb09 100755 --- a/.buildkite/run-openvino-test.sh +++ b/.buildkite/run-openvino-test.sh @@ -5,7 +5,7 @@ set -ex # Try building the docker image -docker build -t openvino-test -f dockerfiles/Dockerfile.openvino . +docker build -t openvino-test -f docker/Dockerfile.openvino . # Setup cleanup remove_docker_container() { docker rm -f openvino-test || true; } diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh index 289c28b237ef..67a979744f37 100755 --- a/.buildkite/run-tpu-test.sh +++ b/.buildkite/run-tpu-test.sh @@ -3,7 +3,7 @@ set -e # Build the docker image. -docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu . +docker build -f docker/Dockerfile.tpu -t vllm-tpu . # Set up cleanup. remove_docker_container() { docker rm -f tpu-test || true; } diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh index 210d768a4e99..1534b659545d 100644 --- a/.buildkite/run-xpu-test.sh +++ b/.buildkite/run-xpu-test.sh @@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}" container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" # Try building the docker image -docker build -t ${image_name} -f dockerfiles/Dockerfile.xpu . +docker build -t ${image_name} -f docker/Dockerfile.xpu . # Setup cleanup remove_docker_container() { diff --git a/.github/mergify.yml b/.github/mergify.yml index f5ff660c1421..e8dbab27bad7 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -19,7 +19,7 @@ pull_request_rules: - files~=\.buildkite/ - files~=^cmake/ - files=CMakeLists.txt - - files~=^dockerfiles/Dockerfile + - files~=^docker/Dockerfile - files~=^requirements.*\.txt - files=setup.py actions: diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml index 2e56a1e597f5..7b1d9f69938c 100644 --- a/.github/workflows/lint-and-deploy.yaml +++ b/.github/workflows/lint-and-deploy.yaml @@ -50,7 +50,7 @@ jobs: uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 - name: Build the Docker image vllm cpu - run: docker buildx build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env . + run: docker buildx build -f docker/Dockerfile.cpu -t vllm-cpu-env . - name: Configuration of docker images, network and namespace for the kind cluster run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 91f8a0f4780f..ab3fc3ecfd8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101") # # Note: the CUDA torch version is derived from pyproject.toml and various # requirements.txt files and should be kept consistent. The ROCm torch -# versions are derived from dockerfiles/Dockerfile.rocm +# versions are derived from docker/Dockerfile.rocm # set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0") set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0") diff --git a/dockerfiles/Dockerfile b/docker/Dockerfile similarity index 100% rename from dockerfiles/Dockerfile rename to docker/Dockerfile diff --git a/dockerfiles/Dockerfile.arm b/docker/Dockerfile.arm similarity index 100% rename from dockerfiles/Dockerfile.arm rename to docker/Dockerfile.arm diff --git a/dockerfiles/Dockerfile.cpu b/docker/Dockerfile.cpu similarity index 100% rename from dockerfiles/Dockerfile.cpu rename to docker/Dockerfile.cpu diff --git a/dockerfiles/Dockerfile.hpu b/docker/Dockerfile.hpu similarity index 100% rename from dockerfiles/Dockerfile.hpu rename to docker/Dockerfile.hpu diff --git a/dockerfiles/Dockerfile.neuron b/docker/Dockerfile.neuron similarity index 100% rename from dockerfiles/Dockerfile.neuron rename to docker/Dockerfile.neuron diff --git a/dockerfiles/Dockerfile.openvino b/docker/Dockerfile.openvino similarity index 100% rename from dockerfiles/Dockerfile.openvino rename to docker/Dockerfile.openvino diff --git a/dockerfiles/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le similarity index 100% rename from dockerfiles/Dockerfile.ppc64le rename to docker/Dockerfile.ppc64le diff --git a/dockerfiles/Dockerfile.rocm b/docker/Dockerfile.rocm similarity index 100% rename from dockerfiles/Dockerfile.rocm rename to docker/Dockerfile.rocm diff --git a/dockerfiles/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base similarity index 100% rename from dockerfiles/Dockerfile.rocm_base rename to docker/Dockerfile.rocm_base diff --git a/dockerfiles/Dockerfile.s390x b/docker/Dockerfile.s390x similarity index 100% rename from dockerfiles/Dockerfile.s390x rename to docker/Dockerfile.s390x diff --git a/dockerfiles/Dockerfile.tpu b/docker/Dockerfile.tpu similarity index 100% rename from dockerfiles/Dockerfile.tpu rename to docker/Dockerfile.tpu diff --git a/dockerfiles/Dockerfile.xpu b/docker/Dockerfile.xpu similarity index 100% rename from dockerfiles/Dockerfile.xpu rename to docker/Dockerfile.xpu diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md index 451bbe808695..90b9a33cfbe6 100644 --- a/docs/source/contributing/dockerfile/dockerfile.md +++ b/docs/source/contributing/dockerfile/dockerfile.md @@ -1,6 +1,6 @@ # Dockerfile -We provide a to construct the image for running an OpenAI compatible server with vLLM. +We provide a to construct the image for running an OpenAI compatible server with vLLM. More information about deploying with Docker can be found [here](#deployment-docker). Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes: @@ -28,7 +28,7 @@ The edges of the build graph represent: > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present): > > ```bash - > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename dockerfiles/Dockerfile + > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename docker/Dockerfile > ``` > > or in case you want to run it directly with the docker image: @@ -43,7 +43,7 @@ The edges of the build graph represent: > --output png \ > --dpi 200 \ > --max-label-length 50 \ - > --filename dockerfiles/Dockerfile \ + > --filename docker/Dockerfile \ > --legend > ``` > diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md index 7d30e45021bb..c4a7d69f0860 100644 --- a/docs/source/deployment/docker.md +++ b/docs/source/deployment/docker.md @@ -61,7 +61,7 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git ## Building vLLM's Docker Image from Source -You can build and run vLLM from source via the provided . To build vLLM: +You can build and run vLLM from source via the provided . To build vLLM: ```console # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2 diff --git a/docs/source/deployment/nginx.md b/docs/source/deployment/nginx.md index 44f6a71fd475..bf404f1098c3 100644 --- a/docs/source/deployment/nginx.md +++ b/docs/source/deployment/nginx.md @@ -69,14 +69,14 @@ server { ```console cd $vllm_root -docker build -f dockerfiles/Dockerfile . --tag vllm +docker build -f docker/Dockerfile . --tag vllm ``` If you are behind proxy, you can pass the proxy settings to the docker build command as shown below: ```console cd $vllm_root -docker build -f dockerfiles/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy +docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy ``` (nginxloadbalancer-nginx-docker-network)= diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index d2f037f45368..e3046f35ee15 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images. ### Build image from source ```console -docker build -f dockerfiles/Dockerfile.hpu -t vllm-hpu-env . +docker build -f docker/Dockerfile.hpu -t vllm-hpu-env . docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md index d278cfb8daee..b4bfb696faa2 100644 --- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md @@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images. See for instructions on building the Docker image. -Make sure to use in place of the default Dockerfile. +Make sure to use in place of the default Dockerfile. ## Extra information diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md index 243c2885b786..67f4cc94260b 100644 --- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md @@ -54,7 +54,7 @@ Currently, there are no pre-built OpenVINO images. ### Build image from source ```console -docker build -f dockerfiles/Dockerfile.openvino -t vllm-openvino-env . +docker build -f docker/Dockerfile.openvino -t vllm-openvino-env . docker run -it --rm vllm-openvino-env ``` diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index bbb88e77a277..beb803cf0597 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -169,10 +169,10 @@ See for instructions on using the o ### Build image from source -You can use to build a Docker image with TPU support. +You can use to build a Docker image with TPU support. ```console -docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu . +docker build -f docker/Dockerfile.tpu -t vllm-tpu . ``` Run the Docker image with the following command: diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md index 6fbcb1b98d94..cef5b9832eee 100644 --- a/docs/source/getting_started/installation/cpu.md +++ b/docs/source/getting_started/installation/cpu.md @@ -164,7 +164,7 @@ Currently, there are no pre-build CPU images. ### Build image from source ```console -$ docker build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . +$ docker build -f docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . $ docker run -it \ --rm \ --network=host \ @@ -174,7 +174,7 @@ $ docker run -it \ ``` ::::{tip} -For ARM or Apple silicon, use `dockerfiles/Dockerfile.arm` +For ARM or Apple silicon, use `docker/Dockerfile.arm` :::: ::::{tip} diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index 3696a04add96..3c1d888109fb 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO #### (Optional) Build an image with ROCm software stack -Build a docker image from which setup ROCm software stack needed by the vLLM. +Build a docker image from which setup ROCm software stack needed by the vLLM. **This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.** If you choose to build this rocm_base image yourself, the steps are as follows. @@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm_base -t rocm/vllm-dev:base . +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm_base -t rocm/vllm-dev:base . ``` #### Build an image with vLLM -First, build a docker image from and launch a docker container from the image. +First, build a docker image from and launch a docker container from the image. It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon: ```console @@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either } ``` - uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. + uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches. It provides flexibility to customize the build of docker image using the following arguments: -- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using +- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build - `BUILD_RPD`: Include RocmProfileData profiling tool in the image - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image @@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default: ```console -DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm -t vllm-rocm . ``` To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image: ```console -DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f dockerfiles/Dockerfile.rocm -t vllm-rocm . +DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f docker/Dockerfile.rocm -t vllm-rocm . ``` To run the above docker image `vllm-rocm`, use the below command: diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index c74832bde0bc..c41905f250f8 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -54,7 +54,7 @@ Currently, there are no pre-built XPU images. ### Build image from source ```console -$ docker build -f dockerfiles/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . +$ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g . $ docker run -it \ --rm \ --network=host \ diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index 785a1fd2a676..bd047fd29c0f 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -198,5 +198,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`. ```{attention} -There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see for instructions on how to install it. +There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see for instructions on how to install it. ``` From 8f22b96f60f84ee75d43f8ea0eec80c5d6c99790 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 24 Mar 2025 16:21:22 +0100 Subject: [PATCH 3/6] Fix moved TPU test Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/run-tpu-v1-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/run-tpu-v1-test.sh b/.buildkite/run-tpu-v1-test.sh index 6562942ea3f8..3924df92294b 100755 --- a/.buildkite/run-tpu-v1-test.sh +++ b/.buildkite/run-tpu-v1-test.sh @@ -3,7 +3,7 @@ set -e # Build the docker image. -docker build -f Dockerfile.tpu -t vllm-tpu . +docker build -f docker/Dockerfile.tpu -t vllm-tpu . # Set up cleanup. remove_docker_container() { docker rm -f tpu-test || true; } From 7e0dbf7bfb2ccc93f2154cd44ea66ed328e3bdf5 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 24 Mar 2025 20:44:05 +0100 Subject: [PATCH 4/6] Fix new dockerfile references Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/source/getting_started/installation/cpu.md | 2 +- vllm/config.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md index cef5b9832eee..a1ece6f1bbd1 100644 --- a/docs/source/getting_started/installation/cpu.md +++ b/docs/source/getting_started/installation/cpu.md @@ -178,7 +178,7 @@ For ARM or Apple silicon, use `docker/Dockerfile.arm` :::: ::::{tip} -For IBM Z (s390x), use `Dockerfile.s390x` and in `docker run` use flag `--dtype float` +For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float` :::: ## Supported features diff --git a/vllm/config.py b/vllm/config.py index 989e5b47516e..bc509a595ea3 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -309,8 +309,8 @@ def __init__( ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: raise ValueError( "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer " - "module was not found." - "See https://github.com/vllm-project/vllm/blob/main/Dockerfile" + "module was not found. See" + "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile" "for instructions on how to install it.") # The tokenizer version is consistent with the model version by default. From 0ed252cc61e2ea37873a3f540d58a4509b645191 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 24 Mar 2025 22:17:05 +0100 Subject: [PATCH 5/6] Respond to comment Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index bc509a595ea3..100a52e3afda 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -309,8 +309,8 @@ def __init__( ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: raise ValueError( "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer " - "module was not found. See" - "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile" + "module was not found. See " + "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile " # noqa: E501 "for instructions on how to install it.") # The tokenizer version is consistent with the model version by default. From cb929c348ba3b99532e712f4d7c67aa34ceefc8d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 31 Mar 2025 14:48:02 +0200 Subject: [PATCH 6/6] Fix some more dockerfile references Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/release-pipeline.yaml | 8 ++++---- .buildkite/run-cpu-test.sh | 4 ++-- .buildkite/run-gh200-test.sh | 1 + docs/source/contributing/overview.md | 2 +- docs/source/deployment/docker.md | 3 ++- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 61f138f829ca..a420759aad91 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -3,7 +3,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -14,7 +14,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -31,7 +31,7 @@ steps: agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/upload-wheels.sh" @@ -48,7 +48,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - label: "Build and publish TPU release image" diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh index bf9f191d3b06..40f3df96065d 100644 --- a/.buildkite/run-cpu-test.sh +++ b/.buildkite/run-cpu-test.sh @@ -18,8 +18,8 @@ trap remove_docker_container EXIT remove_docker_container # Try building the docker image -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu . -numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f docker/Dockerfile.cpu . +numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f docker/Dockerfile.cpu . # Run the image, setting --shm-size=4g for tensor parallel. docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \ diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh index 5c004b47778f..8c64e14606d3 100644 --- a/.buildkite/run-gh200-test.sh +++ b/.buildkite/run-gh200-test.sh @@ -9,6 +9,7 @@ python3 use_existing_torch.py # Try building the docker image DOCKER_BUILDKIT=1 docker build . \ + --file docker/Dockerfile \ --target vllm-openai \ --platform "linux/arm64" \ -t gh200-test \ diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md index 1e6f73dd524e..31c7059fda36 100644 --- a/docs/source/contributing/overview.md +++ b/docs/source/contributing/overview.md @@ -45,7 +45,7 @@ pytest tests/ ``` :::{tip} -Since the ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12. +Since the ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12. Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. ::: diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md index 5a547f4fe0ea..1ccb04ac625c 100644 --- a/docs/source/deployment/docker.md +++ b/docs/source/deployment/docker.md @@ -65,7 +65,7 @@ You can build and run vLLM from source via the provided