From d058de5f4b1f7d17f98057de398db743369ec7df Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Tue, 11 Mar 2025 10:55:28 +0000
Subject: [PATCH 1/6] Move Dockerfiles to `dockerfiles/`

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/release-pipeline.yaml                   |  4 ++--
 .buildkite/run-cpu-test-ppc64le.sh                 |  2 +-
 .buildkite/run-cpu-test.sh                         |  4 ++--
 .buildkite/run-hpu-test.sh                         |  2 +-
 .buildkite/run-neuron-test.sh                      |  2 +-
 .buildkite/run-openvino-test.sh                    |  2 +-
 .buildkite/run-tpu-test.sh                         |  2 +-
 .buildkite/run-xpu-test.sh                         |  2 +-
 .github/mergify.yml                                |  2 +-
 .github/workflows/lint-and-deploy.yaml             |  2 +-
 CMakeLists.txt                                     |  2 +-
 Dockerfile => dockerfiles/Dockerfile               |  0
 Dockerfile.arm => dockerfiles/Dockerfile.arm       |  0
 Dockerfile.cpu => dockerfiles/Dockerfile.cpu       |  0
 Dockerfile.hpu => dockerfiles/Dockerfile.hpu       |  0
 Dockerfile.neuron => dockerfiles/Dockerfile.neuron |  0
 .../Dockerfile.openvino                            |  0
 .../Dockerfile.ppc64le                             |  0
 Dockerfile.rocm => dockerfiles/Dockerfile.rocm     |  0
 .../Dockerfile.rocm_base                           |  0
 Dockerfile.s390x => dockerfiles/Dockerfile.s390x   |  0
 Dockerfile.tpu => dockerfiles/Dockerfile.tpu       |  0
 Dockerfile.xpu => dockerfiles/Dockerfile.xpu       |  0
 docs/source/contributing/dockerfile/dockerfile.md  |  6 +++---
 docs/source/deployment/docker.md                   |  2 +-
 docs/source/deployment/nginx.md                    |  4 ++--
 .../installation/ai_accelerator/hpu-gaudi.inc.md   |  2 +-
 .../installation/ai_accelerator/neuron.inc.md      |  2 +-
 .../installation/ai_accelerator/openvino.inc.md    |  2 +-
 .../installation/ai_accelerator/tpu.inc.md         |  4 ++--
 .../getting_started/installation/cpu/index.md      |  4 ++--
 .../getting_started/installation/gpu/rocm.inc.md   | 14 +++++++-------
 .../getting_started/installation/gpu/xpu.inc.md    |  2 +-
 docs/source/getting_started/quickstart.md          |  2 +-
 34 files changed, 35 insertions(+), 35 deletions(-)
 rename Dockerfile => dockerfiles/Dockerfile (100%)
 rename Dockerfile.arm => dockerfiles/Dockerfile.arm (100%)
 rename Dockerfile.cpu => dockerfiles/Dockerfile.cpu (100%)
 rename Dockerfile.hpu => dockerfiles/Dockerfile.hpu (100%)
 rename Dockerfile.neuron => dockerfiles/Dockerfile.neuron (100%)
 rename Dockerfile.openvino => dockerfiles/Dockerfile.openvino (100%)
 rename Dockerfile.ppc64le => dockerfiles/Dockerfile.ppc64le (100%)
 rename Dockerfile.rocm => dockerfiles/Dockerfile.rocm (100%)
 rename Dockerfile.rocm_base => dockerfiles/Dockerfile.rocm_base (100%)
 rename Dockerfile.s390x => dockerfiles/Dockerfile.s390x (100%)
 rename Dockerfile.tpu => dockerfiles/Dockerfile.tpu (100%)
 rename Dockerfile.xpu => dockerfiles/Dockerfile.xpu (100%)

diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index 37cdab9e01ec..4dbe28394432 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -57,7 +57,7 @@ steps:
     agents:
       queue: tpu_queue_postmerge
     commands:
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f Dockerfile.tpu ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f dockerfiles/Dockerfile.tpu ."
       - "docker push vllm/vllm-tpu:nightly"
       - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
     plugins:
@@ -82,7 +82,7 @@ steps:
       queue: cpu_queue_postmerge
     commands:
       - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f Dockerfile.cpu ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f dockerfiles/Dockerfile.cpu ."
       - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
     env:
       DOCKER_BUILDKIT: "1"
diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh
index bc06838d804f..4df04cc13bac 100755
--- a/.buildkite/run-cpu-test-ppc64le.sh
+++ b/.buildkite/run-cpu-test-ppc64le.sh
@@ -10,5 +10,5 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Try building the docker image
-docker build -t cpu-test -f Dockerfile.ppc64le .
+docker build -t cpu-test -f dockerfiles/Dockerfile.ppc64le .
 
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
index f6dad818ddc0..d6db7f6c1dd9 100644
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@@ -9,8 +9,8 @@ CORE_RANGE=${CORE_RANGE:-48-95}
 NUMA_NODE=${NUMA_NODE:-1}
 
 # Try building the docker image
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f Dockerfile.cpu .
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f dockerfiles/Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f dockerfiles/Dockerfile.cpu .
 
 # Setup cleanup
 remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; }
diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh
index f83eb927aae4..a0ab50d3fd7e 100644
--- a/.buildkite/run-hpu-test.sh
+++ b/.buildkite/run-hpu-test.sh
@@ -5,7 +5,7 @@
 set -ex
 
 # Try building the docker image
-docker build -t hpu-test-env -f Dockerfile.hpu .
+docker build -t hpu-test-env -f dockerfiles/Dockerfile.hpu .
 
 # Setup cleanup
 # certain versions of HPU software stack have a bug that can
diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh
index 55c374fcc33d..89e0ce1af950 100644
--- a/.buildkite/run-neuron-test.sh
+++ b/.buildkite/run-neuron-test.sh
@@ -35,7 +35,7 @@ else
     date "+%s" > /tmp/neuron-docker-build-timestamp
 fi
 
-docker build -t "${image_name}" -f Dockerfile.neuron .
+docker build -t "${image_name}" -f dockerfiles/Dockerfile.neuron .
 
 # Setup cleanup
 remove_docker_container() {
diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh
index a1103bed66ec..4429905687c0 100755
--- a/.buildkite/run-openvino-test.sh
+++ b/.buildkite/run-openvino-test.sh
@@ -5,7 +5,7 @@
 set -ex
 
 # Try building the docker image
-docker build -t openvino-test -f Dockerfile.openvino .
+docker build -t openvino-test -f dockerfiles/Dockerfile.openvino .
 
 # Setup cleanup
 remove_docker_container() { docker rm -f openvino-test || true; }
diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh
index 650af0fac4c6..f67086840954 100755
--- a/.buildkite/run-tpu-test.sh
+++ b/.buildkite/run-tpu-test.sh
@@ -3,7 +3,7 @@
 set -e
 
 # Build the docker image.
-docker build -f Dockerfile.tpu -t vllm-tpu .
+docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu .
 
 # Set up cleanup.
 remove_docker_container() { docker rm -f tpu-test || true; }
diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh
index d48639e5720c..1969c5681dd3 100644
--- a/.buildkite/run-xpu-test.sh
+++ b/.buildkite/run-xpu-test.sh
@@ -5,7 +5,7 @@
 set -ex
 
 # Try building the docker image
-docker build -t xpu-test -f Dockerfile.xpu .
+docker build -t xpu-test -f dockerfiles/Dockerfile.xpu .
 
 # Setup cleanup
 remove_docker_container() { docker rm -f xpu-test || true; }
diff --git a/.github/mergify.yml b/.github/mergify.yml
index 54f56210b286..f5ff660c1421 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -19,7 +19,7 @@ pull_request_rules:
       - files~=\.buildkite/
       - files~=^cmake/
       - files=CMakeLists.txt
-      - files~=^Dockerfile
+      - files~=^dockerfiles/Dockerfile
       - files~=^requirements.*\.txt
       - files=setup.py
   actions:
diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
index b199d0867a64..2e56a1e597f5 100644
--- a/.github/workflows/lint-and-deploy.yaml
+++ b/.github/workflows/lint-and-deploy.yaml
@@ -50,7 +50,7 @@ jobs:
         uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
 
       - name: Build the Docker image vllm cpu
-        run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env .
+        run: docker buildx build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env .
 
       - name: Configuration of docker images, network and namespace for the kind cluster
         run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5349b64aecb6..244f650569e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
 #
 # Note: the CUDA torch version is derived from pyproject.toml and various
 # requirements.txt files and should be kept consistent.  The ROCm torch
-# versions are derived from Dockerfile.rocm
+# versions are derived from dockerfiles/Dockerfile.rocm
 #
 set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1")
 set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1")
diff --git a/Dockerfile b/dockerfiles/Dockerfile
similarity index 100%
rename from Dockerfile
rename to dockerfiles/Dockerfile
diff --git a/Dockerfile.arm b/dockerfiles/Dockerfile.arm
similarity index 100%
rename from Dockerfile.arm
rename to dockerfiles/Dockerfile.arm
diff --git a/Dockerfile.cpu b/dockerfiles/Dockerfile.cpu
similarity index 100%
rename from Dockerfile.cpu
rename to dockerfiles/Dockerfile.cpu
diff --git a/Dockerfile.hpu b/dockerfiles/Dockerfile.hpu
similarity index 100%
rename from Dockerfile.hpu
rename to dockerfiles/Dockerfile.hpu
diff --git a/Dockerfile.neuron b/dockerfiles/Dockerfile.neuron
similarity index 100%
rename from Dockerfile.neuron
rename to dockerfiles/Dockerfile.neuron
diff --git a/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino
similarity index 100%
rename from Dockerfile.openvino
rename to dockerfiles/Dockerfile.openvino
diff --git a/Dockerfile.ppc64le b/dockerfiles/Dockerfile.ppc64le
similarity index 100%
rename from Dockerfile.ppc64le
rename to dockerfiles/Dockerfile.ppc64le
diff --git a/Dockerfile.rocm b/dockerfiles/Dockerfile.rocm
similarity index 100%
rename from Dockerfile.rocm
rename to dockerfiles/Dockerfile.rocm
diff --git a/Dockerfile.rocm_base b/dockerfiles/Dockerfile.rocm_base
similarity index 100%
rename from Dockerfile.rocm_base
rename to dockerfiles/Dockerfile.rocm_base
diff --git a/Dockerfile.s390x b/dockerfiles/Dockerfile.s390x
similarity index 100%
rename from Dockerfile.s390x
rename to dockerfiles/Dockerfile.s390x
diff --git a/Dockerfile.tpu b/dockerfiles/Dockerfile.tpu
similarity index 100%
rename from Dockerfile.tpu
rename to dockerfiles/Dockerfile.tpu
diff --git a/Dockerfile.xpu b/dockerfiles/Dockerfile.xpu
similarity index 100%
rename from Dockerfile.xpu
rename to dockerfiles/Dockerfile.xpu
diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md
index 96674805df53..451bbe808695 100644
--- a/docs/source/contributing/dockerfile/dockerfile.md
+++ b/docs/source/contributing/dockerfile/dockerfile.md
@@ -1,6 +1,6 @@
 # Dockerfile
 
-We provide a <gh-file:Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
+We provide a <gh-file:dockerfiles/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
 More information about deploying with Docker can be found [here](#deployment-docker).
 
 Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
@@ -28,7 +28,7 @@ The edges of the build graph represent:
   > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present):
   >
   > ```bash
-  > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename Dockerfile
+  > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename dockerfiles/Dockerfile
   > ```
   >
   > or in case you want to run it directly with the docker image:
@@ -43,7 +43,7 @@ The edges of the build graph represent:
   >    --output png \
   >    --dpi 200 \
   >    --max-label-length 50 \
-  >    --filename Dockerfile \
+  >    --filename dockerfiles/Dockerfile \
   >    --legend
   > ```
   >
diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md
index 9e52a2182cfb..7d30e45021bb 100644
--- a/docs/source/deployment/docker.md
+++ b/docs/source/deployment/docker.md
@@ -61,7 +61,7 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git
 
 ## Building vLLM's Docker Image from Source
 
-You can build and run vLLM from source via the provided <gh-file:Dockerfile>. To build vLLM:
+You can build and run vLLM from source via the provided <gh-file:dockerfiles/Dockerfile>. To build vLLM:
 
 ```console
 # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
diff --git a/docs/source/deployment/nginx.md b/docs/source/deployment/nginx.md
index 62816f514c00..44f6a71fd475 100644
--- a/docs/source/deployment/nginx.md
+++ b/docs/source/deployment/nginx.md
@@ -69,14 +69,14 @@ server {
 
 ```console
 cd $vllm_root
-docker build -f Dockerfile . --tag vllm
+docker build -f dockerfiles/Dockerfile . --tag vllm
 ```
 
 If you are behind proxy, you can pass the proxy settings to the docker build command as shown below:
 
 ```console
 cd $vllm_root
-docker build -f Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
+docker build -f dockerfiles/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
 ```
 
 (nginxloadbalancer-nginx-docker-network)=
diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
index 7e52f6048909..b670e5d4d885 100644
--- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
@@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images.
 ### Build image from source
 
 ```console
-docker build -f Dockerfile.hpu -t vllm-hpu-env  .
+docker build -f dockerfiles/Dockerfile.hpu -t vllm-hpu-env  .
 docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env
 ```
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
index 4c668a8e6892..d278cfb8daee 100644
--- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
@@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images.
 
 See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
 
-Make sure to use <gh-file:Dockerfile.neuron> in place of the default Dockerfile.
+Make sure to use <gh-file:dockerfiles/Dockerfile.neuron> in place of the default Dockerfile.
 
 ## Extra information
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
index 5641c1563656..ef64767070a4 100644
--- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
@@ -54,7 +54,7 @@ Currently, there are no pre-built OpenVINO images.
 ### Build image from source
 
 ```console
-docker build -f Dockerfile.openvino -t vllm-openvino-env .
+docker build -f dockerfiles/Dockerfile.openvino -t vllm-openvino-env .
 docker run -it --rm vllm-openvino-env
 ```
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
index 6c7bbf602499..bbb88e77a277 100644
--- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
@@ -169,10 +169,10 @@ See <project:#deployment-docker-pre-built-image> for instructions on using the o
 
 ### Build image from source
 
-You can use <gh-file:Dockerfile.tpu> to build a Docker image with TPU support.
+You can use <gh-file:dockerfiles/Dockerfile.tpu> to build a Docker image with TPU support.
 
 ```console
-docker build -f Dockerfile.tpu -t vllm-tpu .
+docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu .
 ```
 
 Run the Docker image with the following command:
diff --git a/docs/source/getting_started/installation/cpu/index.md b/docs/source/getting_started/installation/cpu/index.md
index 9c5977939cc5..697fecba69ac 100644
--- a/docs/source/getting_started/installation/cpu/index.md
+++ b/docs/source/getting_started/installation/cpu/index.md
@@ -134,7 +134,7 @@ Currently, there are no pre-build CPU images.
 ### Build image from source
 
 ```console
-$ docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
+$ docker build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
 $ docker run -it \
              --rm \
              --network=host \
@@ -144,7 +144,7 @@ $ docker run -it \
 ```
 
 ::::{tip}
-For ARM or Apple silicon, use `Dockerfile.arm`
+For ARM or Apple silicon, use `dockerfiles/Dockerfile.arm`
 ::::
 
 ## Supported features
diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md
index 4381cef5e96a..3696a04add96 100644
--- a/docs/source/getting_started/installation/gpu/rocm.inc.md
+++ b/docs/source/getting_started/installation/gpu/rocm.inc.md
@@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO
 
 #### (Optional) Build an image with ROCm software stack
 
-Build a docker image from <gh-file:Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
+Build a docker image from <gh-file:dockerfiles/Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
 **This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.**
 If you choose to build this rocm_base image yourself, the steps are as follows.
 
@@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either
 To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
 
 ```console
-DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm_base -t rocm/vllm-dev:base .
+DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm_base -t rocm/vllm-dev:base .
 ```
 
 #### Build an image with vLLM
 
-First, build a docker image from <gh-file:Dockerfile.rocm> and launch a docker container from the image.
+First, build a docker image from <gh-file:dockerfiles/Dockerfile.rocm> and launch a docker container from the image.
 It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon:
 
 ```console
@@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either
 }
 ```
 
-<gh-file:Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
+<gh-file:dockerfiles/Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
 It provides flexibility to customize the build of docker image using the following arguments:
 
-- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:Dockerfile.rocm_base>
+- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:dockerfiles/Dockerfile.rocm_base>
 - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
 - `BUILD_RPD`: Include RocmProfileData profiling tool in the image
 - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
@@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt
 To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
 
 ```console
-DOCKER_BUILDKIT=1 docker build -f Dockerfile.rocm -t vllm-rocm .
+DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm -t vllm-rocm .
 ```
 
 To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image:
 
 ```console
-DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f Dockerfile.rocm -t vllm-rocm .
+DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f dockerfiles/Dockerfile.rocm -t vllm-rocm .
 ```
 
 To run the above docker image `vllm-rocm`, use the below command:
diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md
index 9678c25b1dd8..269323a2146b 100644
--- a/docs/source/getting_started/installation/gpu/xpu.inc.md
+++ b/docs/source/getting_started/installation/gpu/xpu.inc.md
@@ -48,7 +48,7 @@ Currently, there are no pre-built XPU images.
 ### Build image from source
 
 ```console
-$ docker build -f Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
+$ docker build -f dockerfiles/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
 $ docker run -it \
              --rm \
              --network=host \
diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md
index 452bee2385fe..785a1fd2a676 100644
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/source/getting_started/quickstart.md
@@ -198,5 +198,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a
 If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
 
 ```{attention}
-There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see [Dockerfile](https://github.com/vllm-project/vllm/blob/main/Dockerfile) for instructions on how to install it.
+There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:dockerfiles/Dockerfile> for instructions on how to install it.
 ```

From 957a72c78f4539511b1cc80af90fbb182b0ae78f Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Mon, 17 Mar 2025 15:44:40 +0100
Subject: [PATCH 2/6] `dockerfiles/` -> `docker/`

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/release-pipeline.yaml                   |  4 ++--
 .buildkite/run-cpu-test-ppc64le.sh                 |  2 +-
 .buildkite/run-cpu-test.sh                         |  4 ++--
 .buildkite/run-hpu-test.sh                         |  2 +-
 .buildkite/run-neuron-test.sh                      |  2 +-
 .buildkite/run-openvino-test.sh                    |  2 +-
 .buildkite/run-tpu-test.sh                         |  2 +-
 .buildkite/run-xpu-test.sh                         |  2 +-
 .github/mergify.yml                                |  2 +-
 .github/workflows/lint-and-deploy.yaml             |  2 +-
 CMakeLists.txt                                     |  2 +-
 {dockerfiles => docker}/Dockerfile                 |  0
 {dockerfiles => docker}/Dockerfile.arm             |  0
 {dockerfiles => docker}/Dockerfile.cpu             |  0
 {dockerfiles => docker}/Dockerfile.hpu             |  0
 {dockerfiles => docker}/Dockerfile.neuron          |  0
 {dockerfiles => docker}/Dockerfile.openvino        |  0
 {dockerfiles => docker}/Dockerfile.ppc64le         |  0
 {dockerfiles => docker}/Dockerfile.rocm            |  0
 {dockerfiles => docker}/Dockerfile.rocm_base       |  0
 {dockerfiles => docker}/Dockerfile.s390x           |  0
 {dockerfiles => docker}/Dockerfile.tpu             |  0
 {dockerfiles => docker}/Dockerfile.xpu             |  0
 docs/source/contributing/dockerfile/dockerfile.md  |  6 +++---
 docs/source/deployment/docker.md                   |  2 +-
 docs/source/deployment/nginx.md                    |  4 ++--
 .../installation/ai_accelerator/hpu-gaudi.inc.md   |  2 +-
 .../installation/ai_accelerator/neuron.inc.md      |  2 +-
 .../installation/ai_accelerator/openvino.inc.md    |  2 +-
 .../installation/ai_accelerator/tpu.inc.md         |  4 ++--
 docs/source/getting_started/installation/cpu.md    |  4 ++--
 .../getting_started/installation/gpu/rocm.inc.md   | 14 +++++++-------
 .../getting_started/installation/gpu/xpu.inc.md    |  2 +-
 docs/source/getting_started/quickstart.md          |  2 +-
 34 files changed, 35 insertions(+), 35 deletions(-)
 rename {dockerfiles => docker}/Dockerfile (100%)
 rename {dockerfiles => docker}/Dockerfile.arm (100%)
 rename {dockerfiles => docker}/Dockerfile.cpu (100%)
 rename {dockerfiles => docker}/Dockerfile.hpu (100%)
 rename {dockerfiles => docker}/Dockerfile.neuron (100%)
 rename {dockerfiles => docker}/Dockerfile.openvino (100%)
 rename {dockerfiles => docker}/Dockerfile.ppc64le (100%)
 rename {dockerfiles => docker}/Dockerfile.rocm (100%)
 rename {dockerfiles => docker}/Dockerfile.rocm_base (100%)
 rename {dockerfiles => docker}/Dockerfile.s390x (100%)
 rename {dockerfiles => docker}/Dockerfile.tpu (100%)
 rename {dockerfiles => docker}/Dockerfile.xpu (100%)

diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index 4dbe28394432..1776e073610f 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -57,7 +57,7 @@ steps:
     agents:
       queue: tpu_queue_postmerge
     commands:
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f dockerfiles/Dockerfile.tpu ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ."
       - "docker push vllm/vllm-tpu:nightly"
       - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
     plugins:
@@ -82,7 +82,7 @@ steps:
       queue: cpu_queue_postmerge
     commands:
       - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f dockerfiles/Dockerfile.cpu ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --progress plain -f docker/Dockerfile.cpu ."
       - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
     env:
       DOCKER_BUILDKIT: "1"
diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh
index 4df04cc13bac..9c5cf7cad948 100755
--- a/.buildkite/run-cpu-test-ppc64le.sh
+++ b/.buildkite/run-cpu-test-ppc64le.sh
@@ -10,5 +10,5 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Try building the docker image
-docker build -t cpu-test -f dockerfiles/Dockerfile.ppc64le .
+docker build -t cpu-test -f docker/Dockerfile.ppc64le .
 
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
index a0034bf2a333..7ddf36b1db99 100644
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@@ -9,8 +9,8 @@ CORE_RANGE=${CORE_RANGE:-48-95}
 NUMA_NODE=${NUMA_NODE:-1}
 
 # Try building the docker image
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f dockerfiles/Dockerfile.cpu .
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f dockerfiles/Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test-"$BUILDKITE_BUILD_NUMBER" -f docker/Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 -f docker/Dockerfile.cpu .
 
 # Setup cleanup
 remove_docker_container() { set -e; docker rm -f cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" || true; }
diff --git a/.buildkite/run-hpu-test.sh b/.buildkite/run-hpu-test.sh
index a0ab50d3fd7e..95b6ac37f185 100644
--- a/.buildkite/run-hpu-test.sh
+++ b/.buildkite/run-hpu-test.sh
@@ -5,7 +5,7 @@
 set -ex
 
 # Try building the docker image
-docker build -t hpu-test-env -f dockerfiles/Dockerfile.hpu .
+docker build -t hpu-test-env -f docker/Dockerfile.hpu .
 
 # Setup cleanup
 # certain versions of HPU software stack have a bug that can
diff --git a/.buildkite/run-neuron-test.sh b/.buildkite/run-neuron-test.sh
index c7bfa3ea1ce6..ec6a080eb499 100644
--- a/.buildkite/run-neuron-test.sh
+++ b/.buildkite/run-neuron-test.sh
@@ -35,7 +35,7 @@ else
     date "+%s" > /tmp/neuron-docker-build-timestamp
 fi
 
-docker build -t "${image_name}" -f dockerfiles/Dockerfile.neuron .
+docker build -t "${image_name}" -f docker/Dockerfile.neuron .
 
 # Setup cleanup
 remove_docker_container() {
diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh
index 4429905687c0..9be8fde5eb09 100755
--- a/.buildkite/run-openvino-test.sh
+++ b/.buildkite/run-openvino-test.sh
@@ -5,7 +5,7 @@
 set -ex
 
 # Try building the docker image
-docker build -t openvino-test -f dockerfiles/Dockerfile.openvino .
+docker build -t openvino-test -f docker/Dockerfile.openvino .
 
 # Setup cleanup
 remove_docker_container() { docker rm -f openvino-test || true; }
diff --git a/.buildkite/run-tpu-test.sh b/.buildkite/run-tpu-test.sh
index 289c28b237ef..67a979744f37 100755
--- a/.buildkite/run-tpu-test.sh
+++ b/.buildkite/run-tpu-test.sh
@@ -3,7 +3,7 @@
 set -e
 
 # Build the docker image.
-docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu .
+docker build -f docker/Dockerfile.tpu -t vllm-tpu .
 
 # Set up cleanup.
 remove_docker_container() { docker rm -f tpu-test || true; }
diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh
index 210d768a4e99..1534b659545d 100644
--- a/.buildkite/run-xpu-test.sh
+++ b/.buildkite/run-xpu-test.sh
@@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
 container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
 
 # Try building the docker image
-docker build -t ${image_name} -f dockerfiles/Dockerfile.xpu .
+docker build -t ${image_name} -f docker/Dockerfile.xpu .
 
 # Setup cleanup
 remove_docker_container() { 
diff --git a/.github/mergify.yml b/.github/mergify.yml
index f5ff660c1421..e8dbab27bad7 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -19,7 +19,7 @@ pull_request_rules:
       - files~=\.buildkite/
       - files~=^cmake/
       - files=CMakeLists.txt
-      - files~=^dockerfiles/Dockerfile
+      - files~=^docker/Dockerfile
       - files~=^requirements.*\.txt
       - files=setup.py
   actions:
diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
index 2e56a1e597f5..7b1d9f69938c 100644
--- a/.github/workflows/lint-and-deploy.yaml
+++ b/.github/workflows/lint-and-deploy.yaml
@@ -50,7 +50,7 @@ jobs:
         uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0
 
       - name: Build the Docker image vllm cpu
-        run: docker buildx build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env .
+        run: docker buildx build -f docker/Dockerfile.cpu -t vllm-cpu-env .
 
       - name: Configuration of docker images, network and namespace for the kind cluster
         run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91f8a0f4780f..ab3fc3ecfd8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
 #
 # Note: the CUDA torch version is derived from pyproject.toml and various
 # requirements.txt files and should be kept consistent.  The ROCm torch
-# versions are derived from dockerfiles/Dockerfile.rocm
+# versions are derived from docker/Dockerfile.rocm
 #
 set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
 set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
diff --git a/dockerfiles/Dockerfile b/docker/Dockerfile
similarity index 100%
rename from dockerfiles/Dockerfile
rename to docker/Dockerfile
diff --git a/dockerfiles/Dockerfile.arm b/docker/Dockerfile.arm
similarity index 100%
rename from dockerfiles/Dockerfile.arm
rename to docker/Dockerfile.arm
diff --git a/dockerfiles/Dockerfile.cpu b/docker/Dockerfile.cpu
similarity index 100%
rename from dockerfiles/Dockerfile.cpu
rename to docker/Dockerfile.cpu
diff --git a/dockerfiles/Dockerfile.hpu b/docker/Dockerfile.hpu
similarity index 100%
rename from dockerfiles/Dockerfile.hpu
rename to docker/Dockerfile.hpu
diff --git a/dockerfiles/Dockerfile.neuron b/docker/Dockerfile.neuron
similarity index 100%
rename from dockerfiles/Dockerfile.neuron
rename to docker/Dockerfile.neuron
diff --git a/dockerfiles/Dockerfile.openvino b/docker/Dockerfile.openvino
similarity index 100%
rename from dockerfiles/Dockerfile.openvino
rename to docker/Dockerfile.openvino
diff --git a/dockerfiles/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le
similarity index 100%
rename from dockerfiles/Dockerfile.ppc64le
rename to docker/Dockerfile.ppc64le
diff --git a/dockerfiles/Dockerfile.rocm b/docker/Dockerfile.rocm
similarity index 100%
rename from dockerfiles/Dockerfile.rocm
rename to docker/Dockerfile.rocm
diff --git a/dockerfiles/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base
similarity index 100%
rename from dockerfiles/Dockerfile.rocm_base
rename to docker/Dockerfile.rocm_base
diff --git a/dockerfiles/Dockerfile.s390x b/docker/Dockerfile.s390x
similarity index 100%
rename from dockerfiles/Dockerfile.s390x
rename to docker/Dockerfile.s390x
diff --git a/dockerfiles/Dockerfile.tpu b/docker/Dockerfile.tpu
similarity index 100%
rename from dockerfiles/Dockerfile.tpu
rename to docker/Dockerfile.tpu
diff --git a/dockerfiles/Dockerfile.xpu b/docker/Dockerfile.xpu
similarity index 100%
rename from dockerfiles/Dockerfile.xpu
rename to docker/Dockerfile.xpu
diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/source/contributing/dockerfile/dockerfile.md
index 451bbe808695..90b9a33cfbe6 100644
--- a/docs/source/contributing/dockerfile/dockerfile.md
+++ b/docs/source/contributing/dockerfile/dockerfile.md
@@ -1,6 +1,6 @@
 # Dockerfile
 
-We provide a <gh-file:dockerfiles/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
+We provide a <gh-file:docker/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
 More information about deploying with Docker can be found [here](#deployment-docker).
 
 Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
@@ -28,7 +28,7 @@ The edges of the build graph represent:
   > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present):
   >
   > ```bash
-  > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename dockerfiles/Dockerfile
+  > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename docker/Dockerfile
   > ```
   >
   > or in case you want to run it directly with the docker image:
@@ -43,7 +43,7 @@ The edges of the build graph represent:
   >    --output png \
   >    --dpi 200 \
   >    --max-label-length 50 \
-  >    --filename dockerfiles/Dockerfile \
+  >    --filename docker/Dockerfile \
   >    --legend
   > ```
   >
diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md
index 7d30e45021bb..c4a7d69f0860 100644
--- a/docs/source/deployment/docker.md
+++ b/docs/source/deployment/docker.md
@@ -61,7 +61,7 @@ RUN uv pip install --system git+https://github.com/huggingface/transformers.git
 
 ## Building vLLM's Docker Image from Source
 
-You can build and run vLLM from source via the provided <gh-file:dockerfiles/Dockerfile>. To build vLLM:
+You can build and run vLLM from source via the provided <gh-file:docker/Dockerfile>. To build vLLM:
 
 ```console
 # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
diff --git a/docs/source/deployment/nginx.md b/docs/source/deployment/nginx.md
index 44f6a71fd475..bf404f1098c3 100644
--- a/docs/source/deployment/nginx.md
+++ b/docs/source/deployment/nginx.md
@@ -69,14 +69,14 @@ server {
 
 ```console
 cd $vllm_root
-docker build -f dockerfiles/Dockerfile . --tag vllm
+docker build -f docker/Dockerfile . --tag vllm
 ```
 
 If you are behind proxy, you can pass the proxy settings to the docker build command as shown below:
 
 ```console
 cd $vllm_root
-docker build -f dockerfiles/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
+docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
 ```
 
 (nginxloadbalancer-nginx-docker-network)=
diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
index d2f037f45368..e3046f35ee15 100644
--- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
@@ -86,7 +86,7 @@ Currently, there are no pre-built Intel Gaudi images.
 ### Build image from source
 
 ```console
-docker build -f dockerfiles/Dockerfile.hpu -t vllm-hpu-env  .
+docker build -f docker/Dockerfile.hpu -t vllm-hpu-env  .
 docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env
 ```
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
index d278cfb8daee..b4bfb696faa2 100644
--- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
@@ -132,7 +132,7 @@ Currently, there are no pre-built Neuron images.
 
 See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
 
-Make sure to use <gh-file:dockerfiles/Dockerfile.neuron> in place of the default Dockerfile.
+Make sure to use <gh-file:docker/Dockerfile.neuron> in place of the default Dockerfile.
 
 ## Extra information
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
index 243c2885b786..67f4cc94260b 100644
--- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
@@ -54,7 +54,7 @@ Currently, there are no pre-built OpenVINO images.
 ### Build image from source
 
 ```console
-docker build -f dockerfiles/Dockerfile.openvino -t vllm-openvino-env .
+docker build -f docker/Dockerfile.openvino -t vllm-openvino-env .
 docker run -it --rm vllm-openvino-env
 ```
 
diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
index bbb88e77a277..beb803cf0597 100644
--- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
+++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
@@ -169,10 +169,10 @@ See <project:#deployment-docker-pre-built-image> for instructions on using the o
 
 ### Build image from source
 
-You can use <gh-file:dockerfiles/Dockerfile.tpu> to build a Docker image with TPU support.
+You can use <gh-file:docker/Dockerfile.tpu> to build a Docker image with TPU support.
 
 ```console
-docker build -f dockerfiles/Dockerfile.tpu -t vllm-tpu .
+docker build -f docker/Dockerfile.tpu -t vllm-tpu .
 ```
 
 Run the Docker image with the following command:
diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md
index 6fbcb1b98d94..cef5b9832eee 100644
--- a/docs/source/getting_started/installation/cpu.md
+++ b/docs/source/getting_started/installation/cpu.md
@@ -164,7 +164,7 @@ Currently, there are no pre-build CPU images.
 ### Build image from source
 
 ```console
-$ docker build -f dockerfiles/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
+$ docker build -f docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
 $ docker run -it \
              --rm \
              --network=host \
@@ -174,7 +174,7 @@ $ docker run -it \
 ```
 
 ::::{tip}
-For ARM or Apple silicon, use `dockerfiles/Dockerfile.arm`
+For ARM or Apple silicon, use `docker/Dockerfile.arm`
 ::::
 
 ::::{tip}
diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md
index 3696a04add96..3c1d888109fb 100644
--- a/docs/source/getting_started/installation/gpu/rocm.inc.md
+++ b/docs/source/getting_started/installation/gpu/rocm.inc.md
@@ -123,7 +123,7 @@ Building the Docker image from source is the recommended way to use vLLM with RO
 
 #### (Optional) Build an image with ROCm software stack
 
-Build a docker image from <gh-file:dockerfiles/Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
+Build a docker image from <gh-file:docker/Dockerfile.rocm_base> which setup ROCm software stack needed by the vLLM.
 **This step is optional as this rocm_base image is usually prebuilt and store at [Docker Hub](https://hub.docker.com/r/rocm/vllm-dev) under tag `rocm/vllm-dev:base` to speed up user experience.**
 If you choose to build this rocm_base image yourself, the steps are as follows.
 
@@ -140,12 +140,12 @@ It is important that the user kicks off the docker build using buildkit. Either
 To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
 
 ```console
-DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm_base -t rocm/vllm-dev:base .
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm_base -t rocm/vllm-dev:base .
 ```
 
 #### Build an image with vLLM
 
-First, build a docker image from <gh-file:dockerfiles/Dockerfile.rocm> and launch a docker container from the image.
+First, build a docker image from <gh-file:docker/Dockerfile.rocm> and launch a docker container from the image.
 It is important that the user kicks off the docker build using buildkit. Either the user put `DOCKER_BUILDKIT=1` as environment variable when calling docker build command, or the user needs to setup buildkit in the docker daemon configuration /etc/docker/daemon.json as follows and restart the daemon:
 
 ```console
@@ -156,10 +156,10 @@ It is important that the user kicks off the docker build using buildkit. Either
 }
 ```
 
-<gh-file:dockerfiles/Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
+<gh-file:docker/Dockerfile.rocm> uses ROCm 6.3 by default, but also supports ROCm 5.7, 6.0, 6.1, and 6.2, in older vLLM branches.
 It provides flexibility to customize the build of docker image using the following arguments:
 
-- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:dockerfiles/Dockerfile.rocm_base>
+- `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:docker/Dockerfile.rocm_base>
 - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
 - `BUILD_RPD`: Include RocmProfileData profiling tool in the image
 - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
@@ -169,13 +169,13 @@ Their values can be passed in when running `docker build` with `--build-arg` opt
 To build vllm on ROCm 6.3 for MI200 and MI300 series, you can use the default:
 
 ```console
-DOCKER_BUILDKIT=1 docker build -f dockerfiles/Dockerfile.rocm -t vllm-rocm .
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.rocm -t vllm-rocm .
 ```
 
 To build vllm on ROCm 6.3 for Radeon RX7900 series (gfx1100), you should pick the alternative base image:
 
 ```console
-DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f dockerfiles/Dockerfile.rocm -t vllm-rocm .
+DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE="rocm/vllm-dev:navi_base" -f docker/Dockerfile.rocm -t vllm-rocm .
 ```
 
 To run the above docker image `vllm-rocm`, use the below command:
diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md
index c74832bde0bc..c41905f250f8 100644
--- a/docs/source/getting_started/installation/gpu/xpu.inc.md
+++ b/docs/source/getting_started/installation/gpu/xpu.inc.md
@@ -54,7 +54,7 @@ Currently, there are no pre-built XPU images.
 ### Build image from source
 
 ```console
-$ docker build -f dockerfiles/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
+$ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
 $ docker run -it \
              --rm \
              --network=host \
diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md
index 785a1fd2a676..bd047fd29c0f 100644
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/source/getting_started/quickstart.md
@@ -198,5 +198,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a
 If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
 
 ```{attention}
-There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:dockerfiles/Dockerfile> for instructions on how to install it.
+There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:docker/Dockerfile> for instructions on how to install it.
 ```

From 8f22b96f60f84ee75d43f8ea0eec80c5d6c99790 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Mon, 24 Mar 2025 16:21:22 +0100
Subject: [PATCH 3/6] Fix moved TPU test

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/run-tpu-v1-test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/run-tpu-v1-test.sh b/.buildkite/run-tpu-v1-test.sh
index 6562942ea3f8..3924df92294b 100755
--- a/.buildkite/run-tpu-v1-test.sh
+++ b/.buildkite/run-tpu-v1-test.sh
@@ -3,7 +3,7 @@
 set -e
 
 # Build the docker image.
-docker build -f Dockerfile.tpu -t vllm-tpu .
+docker build -f docker/Dockerfile.tpu -t vllm-tpu .
 
 # Set up cleanup.
 remove_docker_container() { docker rm -f tpu-test || true; }

From 7e0dbf7bfb2ccc93f2154cd44ea66ed328e3bdf5 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Mon, 24 Mar 2025 20:44:05 +0100
Subject: [PATCH 4/6] Fix new dockerfile references

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 docs/source/getting_started/installation/cpu.md | 2 +-
 vllm/config.py                                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md
index cef5b9832eee..a1ece6f1bbd1 100644
--- a/docs/source/getting_started/installation/cpu.md
+++ b/docs/source/getting_started/installation/cpu.md
@@ -178,7 +178,7 @@ For ARM or Apple silicon, use `docker/Dockerfile.arm`
 ::::
 
 ::::{tip}
-For IBM Z (s390x), use `Dockerfile.s390x` and in `docker run` use flag `--dtype float`
+For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float`
 ::::
 
 ## Supported features
diff --git a/vllm/config.py b/vllm/config.py
index 989e5b47516e..bc509a595ea3 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -309,8 +309,8 @@ def __init__(
             ) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
             raise ValueError(
                 "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
-                "module was not found."
-                "See https://github.com/vllm-project/vllm/blob/main/Dockerfile"
+                "module was not found. See"
+                "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile"
                 "for instructions on how to install it.")
 
         # The tokenizer version is consistent with the model version by default.

From 0ed252cc61e2ea37873a3f540d58a4509b645191 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Mon, 24 Mar 2025 22:17:05 +0100
Subject: [PATCH 5/6] Respond to comment

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index bc509a595ea3..100a52e3afda 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -309,8 +309,8 @@ def __init__(
             ) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
             raise ValueError(
                 "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
-                "module was not found. See"
-                "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile"
+                "module was not found. See "
+                "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile "  # noqa: E501
                 "for instructions on how to install it.")
 
         # The tokenizer version is consistent with the model version by default.

From cb929c348ba3b99532e712f4d7c67aa34ceefc8d Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Mon, 31 Mar 2025 14:48:02 +0200
Subject: [PATCH 6/6] Fix some more dockerfile references

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/release-pipeline.yaml     | 8 ++++----
 .buildkite/run-cpu-test.sh           | 4 ++--
 .buildkite/run-gh200-test.sh         | 1 +
 docs/source/contributing/overview.md | 2 +-
 docs/source/deployment/docker.md     | 3 ++-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
index 61f138f829ca..a420759aad91 100644
--- a/.buildkite/release-pipeline.yaml
+++ b/.buildkite/release-pipeline.yaml
@@ -3,7 +3,7 @@ steps:
     agents:
       queue: cpu_queue_postmerge
     commands:
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
       - "mkdir artifacts"
       - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
       - "bash .buildkite/upload-wheels.sh"
@@ -14,7 +14,7 @@ steps:
     agents:
       queue: cpu_queue_postmerge
     commands:
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
       - "mkdir artifacts"
       - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
       - "bash .buildkite/upload-wheels.sh"
@@ -31,7 +31,7 @@ steps:
     agents:
       queue: cpu_queue_postmerge
     commands:
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
       - "mkdir artifacts"
       - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
       - "bash .buildkite/upload-wheels.sh"
@@ -48,7 +48,7 @@ steps:
       queue: cpu_queue_postmerge
     commands:
       - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ."
+      - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
       - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
 
   - label: "Build and publish TPU release image"
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
index bf9f191d3b06..40f3df96065d 100644
--- a/.buildkite/run-cpu-test.sh
+++ b/.buildkite/run-cpu-test.sh
@@ -18,8 +18,8 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Try building the docker image
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu .
-numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f docker/Dockerfile.cpu .
+numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f docker/Dockerfile.cpu .
 
 # Run the image, setting --shm-size=4g for tensor parallel.
 docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE"  \
diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh
index 5c004b47778f..8c64e14606d3 100644
--- a/.buildkite/run-gh200-test.sh
+++ b/.buildkite/run-gh200-test.sh
@@ -9,6 +9,7 @@ python3 use_existing_torch.py
 
 # Try building the docker image
 DOCKER_BUILDKIT=1 docker build . \
+  --file docker/Dockerfile \
   --target vllm-openai \
   --platform "linux/arm64" \
   -t gh200-test \
diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md
index 1e6f73dd524e..31c7059fda36 100644
--- a/docs/source/contributing/overview.md
+++ b/docs/source/contributing/overview.md
@@ -45,7 +45,7 @@ pytest tests/
 ```
 
 :::{tip}
-Since the <gh-file:Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
+Since the <gh-file:docker/Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
 
 Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
 :::
diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md
index 5a547f4fe0ea..1ccb04ac625c 100644
--- a/docs/source/deployment/docker.md
+++ b/docs/source/deployment/docker.md
@@ -65,7 +65,7 @@ You can build and run vLLM from source via the provided <gh-file:docker/Dockerfi
 
 ```console
 # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
-DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai
+DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile
 ```
 
 :::{note}
@@ -92,6 +92,7 @@ Keep an eye on memory usage with parallel jobs as it can be substantial (see exa
 # Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
 $ python3 use_existing_torch.py
 $ DOCKER_BUILDKIT=1 docker build . \
+  --file docker/Dockerfile \
   --target vllm-openai \
   --platform "linux/arm64" \
   -t vllm/vllm-gh200-openai:latest \