Skip to content

Commit 9fdf3de

Browse files
authored
Cmake based build system (#2830)
1 parent c0c17d4 commit 9fdf3de

File tree

10 files changed

+868
-302
lines changed

10 files changed

+868
-302
lines changed

CMakeLists.txt

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
cmake_minimum_required(VERSION 3.21)
2+
3+
project(vllm_extensions LANGUAGES CXX)
4+
5+
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
6+
7+
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
8+
9+
#
10+
# Supported python versions. These versions will be searched in order, the
11+
# first match will be selected. These should be kept in sync with setup.py.
12+
#
13+
set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11")
14+
15+
# Supported NVIDIA architectures.
16+
set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.9;9.0")
17+
18+
# Supported AMD GPU architectures.
19+
set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx1100")
20+
21+
#
22+
# Supported/expected torch versions for CUDA/ROCm.
23+
#
24+
# Currently, having an incorrect pytorch version results in a warning
25+
# rather than an error.
26+
#
27+
# Note: the CUDA torch version is derived from pyproject.toml and various
28+
# requirements.txt files and should be kept consistent. The ROCm torch
29+
# versions are derived from Dockerfile.rocm
30+
#
31+
set(TORCH_SUPPORTED_VERSION_CUDA "2.1.2")
32+
set(TORCH_SUPPORTED_VERSION_ROCM_5X "2.0.1")
33+
set(TORCH_SUPPORTED_VERSION_ROCM_6X "2.1.1")
34+
35+
#
36+
# Try to find python package with an executable that exactly matches
37+
# `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions.
38+
#
39+
if (VLLM_PYTHON_EXECUTABLE)
40+
find_python_from_executable(${VLLM_PYTHON_EXECUTABLE} "${PYTHON_SUPPORTED_VERSIONS}")
41+
else()
42+
message(FATAL_ERROR
43+
"Please set VLLM_PYTHON_EXECUTABLE to the path of the desired python version"
44+
" before running cmake configure.")
45+
endif()
46+
47+
#
48+
# Update cmake's `CMAKE_PREFIX_PATH` with torch location.
49+
#
50+
append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
51+
52+
#
53+
# Import torch cmake configuration.
54+
# Torch also imports CUDA (and partially HIP) languages with some customizations,
55+
# so there is no need to do this explicitly with check_language/enable_language,
56+
# etc.
57+
#
58+
find_package(Torch REQUIRED)
59+
60+
#
61+
# Normally `torch.utils.cpp_extension.CUDAExtension` would add
62+
# `libtorch_python.so` for linking against an extension. Torch's cmake
63+
# configuration does not include this library (presumably since the cmake
64+
# config is used for standalone C++ binaries that link against torch).
65+
# The `libtorch_python.so` library defines some of the glue code between
66+
# torch/python via pybind and is required by VLLM extensions for this
67+
# reason. So, add it by manually using `append_torchlib_if_found` from
68+
# torch's cmake setup.
69+
#
70+
append_torchlib_if_found(torch_python)
71+
72+
#
73+
# Set up GPU language and check the torch version and warn if it isn't
74+
# what is expected.
75+
#
76+
if (NOT HIP_FOUND AND CUDA_FOUND)
77+
set(VLLM_GPU_LANG "CUDA")
78+
79+
if (NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_CUDA})
80+
message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_CUDA} "
81+
"expected for CUDA build, saw ${Torch_VERSION} instead.")
82+
endif()
83+
elseif(HIP_FOUND)
84+
set(VLLM_GPU_LANG "HIP")
85+
86+
# Importing torch recognizes and sets up some HIP/ROCm configuration but does
87+
# not let cmake recognize .hip files. In order to get cmake to understand the
88+
# .hip extension automatically, HIP must be enabled explicitly.
89+
enable_language(HIP)
90+
91+
# ROCm 5.x
92+
if (ROCM_VERSION_DEV_MAJOR EQUAL 5 AND
93+
NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_ROCM_5X})
94+
message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_ROCM_5X} "
95+
"expected for ROCMm 5.x build, saw ${Torch_VERSION} instead.")
96+
endif()
97+
98+
# ROCm 6.x
99+
if (ROCM_VERSION_DEV_MAJOR EQUAL 6 AND
100+
NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_ROCM_6X})
101+
message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_ROCM_6X} "
102+
"expected for ROCMm 6.x build, saw ${Torch_VERSION} instead.")
103+
endif()
104+
else()
105+
message(FATAL_ERROR "Can't find CUDA or HIP installation.")
106+
endif()
107+
108+
#
109+
# Override the GPU architectures detected by cmake/torch and filter them by
110+
# the supported versions for the current language.
111+
# The final set of arches is stored in `VLLM_GPU_ARCHES`.
112+
#
113+
override_gpu_arches(VLLM_GPU_ARCHES
114+
${VLLM_GPU_LANG}
115+
"${${VLLM_GPU_LANG}_SUPPORTED_ARCHS}")
116+
117+
#
118+
# Query torch for additional GPU compilation flags for the given
119+
# `VLLM_GPU_LANG`.
120+
# The final set of arches is stored in `VLLM_GPU_FLAGS`.
121+
#
122+
get_torch_gpu_compiler_flags(VLLM_GPU_FLAGS ${VLLM_GPU_LANG})
123+
124+
#
125+
# Set nvcc parallelism.
126+
#
127+
if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA")
128+
list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}")
129+
endif()
130+
131+
#
132+
# Define extension targets
133+
#
134+
135+
#
136+
# _C extension
137+
#
138+
139+
set(VLLM_EXT_SRC
140+
"csrc/cache_kernels.cu"
141+
"csrc/attention/attention_kernels.cu"
142+
"csrc/pos_encoding_kernels.cu"
143+
"csrc/activation_kernels.cu"
144+
"csrc/layernorm_kernels.cu"
145+
"csrc/quantization/squeezellm/quant_cuda_kernel.cu"
146+
"csrc/quantization/gptq/q_gemm.cu"
147+
"csrc/cuda_utils_kernels.cu"
148+
"csrc/moe_align_block_size_kernels.cu"
149+
"csrc/pybind.cpp")
150+
151+
if(VLLM_GPU_LANG STREQUAL "CUDA")
152+
list(APPEND VLLM_EXT_SRC
153+
"csrc/quantization/awq/gemm_kernels.cu"
154+
"csrc/quantization/marlin/marlin_cuda_kernel.cu"
155+
"csrc/custom_all_reduce.cu")
156+
endif()
157+
158+
define_gpu_extension_target(
159+
_C
160+
DESTINATION vllm
161+
LANGUAGE ${VLLM_GPU_LANG}
162+
SOURCES ${VLLM_EXT_SRC}
163+
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
164+
ARCHITECTURES ${VLLM_GPU_ARCHES}
165+
WITH_SOABI)
166+
167+
#
168+
# _moe_C extension
169+
#
170+
171+
set(VLLM_MOE_EXT_SRC
172+
"csrc/moe/moe_ops.cpp"
173+
"csrc/moe/topk_softmax_kernels.cu")
174+
175+
define_gpu_extension_target(
176+
_moe_C
177+
DESTINATION vllm
178+
LANGUAGE ${VLLM_GPU_LANG}
179+
SOURCES ${VLLM_MOE_EXT_SRC}
180+
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
181+
ARCHITECTURES ${VLLM_GPU_ARCHES}
182+
WITH_SOABI)
183+
184+
#
185+
# _punica_C extension
186+
#
187+
188+
set(VLLM_PUNICA_EXT_SRC
189+
"csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu"
190+
"csrc/punica/bgmv/bgmv_bf16_bf16_fp16.cu"
191+
"csrc/punica/bgmv/bgmv_bf16_fp16_bf16.cu"
192+
"csrc/punica/bgmv/bgmv_bf16_fp16_fp16.cu"
193+
"csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu"
194+
"csrc/punica/bgmv/bgmv_bf16_fp32_fp16.cu"
195+
"csrc/punica/bgmv/bgmv_fp16_bf16_bf16.cu"
196+
"csrc/punica/bgmv/bgmv_fp16_bf16_fp16.cu"
197+
"csrc/punica/bgmv/bgmv_fp16_fp16_bf16.cu"
198+
"csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu"
199+
"csrc/punica/bgmv/bgmv_fp16_fp32_bf16.cu"
200+
"csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu"
201+
"csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu"
202+
"csrc/punica/bgmv/bgmv_fp32_bf16_fp16.cu"
203+
"csrc/punica/bgmv/bgmv_fp32_fp16_bf16.cu"
204+
"csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu"
205+
"csrc/punica/bgmv/bgmv_fp32_fp32_bf16.cu"
206+
"csrc/punica/bgmv/bgmv_fp32_fp32_fp16.cu"
207+
"csrc/punica/punica_ops.cc")
208+
209+
#
210+
# Copy GPU compilation flags+update for punica
211+
#
212+
set(VLLM_PUNICA_GPU_FLAGS ${VLLM_GPU_FLAGS})
213+
list(REMOVE_ITEM VLLM_PUNICA_GPU_FLAGS
214+
"-D__CUDA_NO_HALF_OPERATORS__"
215+
"-D__CUDA_NO_HALF_CONVERSIONS__"
216+
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__"
217+
"-D__CUDA_NO_HALF2_OPERATORS__")
218+
219+
#
220+
# Filter out CUDA architectures < 8.0 for punica.
221+
#
222+
if (${VLLM_GPU_LANG} STREQUAL "CUDA")
223+
set(VLLM_PUNICA_GPU_ARCHES)
224+
foreach(ARCH ${VLLM_GPU_ARCHES})
225+
string_to_ver(CODE_VER ${ARCH})
226+
if (CODE_VER GREATER_EQUAL 8.0)
227+
list(APPEND VLLM_PUNICA_GPU_ARCHES ${ARCH})
228+
endif()
229+
endforeach()
230+
message(STATUS "Punica target arches: ${VLLM_PUNICA_GPU_ARCHES}")
231+
endif()
232+
233+
if (VLLM_PUNICA_GPU_ARCHES)
234+
define_gpu_extension_target(
235+
_punica_C
236+
DESTINATION vllm
237+
LANGUAGE ${VLLM_GPU_LANG}
238+
SOURCES ${VLLM_PUNICA_EXT_SRC}
239+
COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
240+
ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
241+
WITH_SOABI)
242+
else()
243+
message(WARNING "Unable to create _punica_C target because none of the "
244+
"requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
245+
endif()
246+
247+
#
248+
# Add the `default` target which detects which extensions should be
249+
# built based on platform/architecture. This is the same logic that
250+
# setup.py uses to select which extensions should be built and should
251+
# be kept in sync.
252+
#
253+
# The `default` target makes direct use of cmake easier since knowledge
254+
# of which extensions are supported has been factored in, e.g.
255+
#
256+
# mkdir build && cd build
257+
# cmake -G Ninja -DVLLM_PYTHON_EXECUTABLE=`which python3` -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=../vllm ..
258+
# cmake --build . --target default
259+
#
260+
add_custom_target(default)
261+
262+
if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
263+
message(STATUS "Enabling C extension.")
264+
add_dependencies(default _C)
265+
endif()
266+
267+
if(VLLM_GPU_LANG STREQUAL "CUDA")
268+
message(STATUS "Enabling moe extension.")
269+
add_dependencies(default _moe_C)
270+
271+
# Enable punica if -DVLLM_INSTALL_PUNICA_KERNELS=ON or
272+
# VLLM_INSTALL_PUNICA_KERNELS is set in the environment and
273+
# there are supported target arches.
274+
if (VLLM_PUNICA_GPU_ARCHES AND
275+
(ENV{VLLM_INSTALL_PUNICA_KERNELS} OR VLLM_INSTALL_PUNICA_KERNELS))
276+
message(STATUS "Enabling punica extension.")
277+
add_dependencies(default _punica_C)
278+
endif()
279+
endif()

Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
3838
# copy input files
3939
COPY csrc csrc
4040
COPY setup.py setup.py
41+
COPY cmake cmake
42+
COPY CMakeLists.txt CMakeLists.txt
4143
COPY requirements.txt requirements.txt
4244
COPY pyproject.toml pyproject.toml
4345
COPY vllm/__init__.py vllm/__init__.py

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
include LICENSE
22
include requirements.txt
3+
include CMakeLists.txt
34

5+
recursive-include cmake *
46
recursive-include csrc *

cmake/hipify.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# A command line tool for running pytorch's hipify preprocessor on CUDA
5+
# source files.
6+
#
7+
# See https://github.com/ROCm/hipify_torch
8+
# and <torch install dir>/utils/hipify/hipify_python.py
9+
#
10+
11+
import argparse
12+
import shutil
13+
import os
14+
15+
from torch.utils.hipify.hipify_python import hipify
16+
17+
if __name__ == '__main__':
18+
parser = argparse.ArgumentParser()
19+
20+
# Project directory where all the source + include files live.
21+
parser.add_argument(
22+
"-p",
23+
"--project_dir",
24+
help="The project directory.",
25+
)
26+
27+
# Directory where hipified files are written.
28+
parser.add_argument(
29+
"-o",
30+
"--output_dir",
31+
help="The output directory.",
32+
)
33+
34+
# Source files to convert.
35+
parser.add_argument("sources",
36+
help="Source files to hipify.",
37+
nargs="*",
38+
default=[])
39+
40+
args = parser.parse_args()
41+
42+
# Limit include scope to project_dir only
43+
includes = [os.path.join(args.project_dir, '*')]
44+
45+
# Get absolute path for all source files.
46+
extra_files = [os.path.abspath(s) for s in args.sources]
47+
48+
# Copy sources from project directory to output directory.
49+
# The directory might already exist to hold object files so we ignore that.
50+
shutil.copytree(args.project_dir, args.output_dir, dirs_exist_ok=True)
51+
52+
hipify_result = hipify(project_directory=args.project_dir,
53+
output_directory=args.output_dir,
54+
header_include_dirs=[],
55+
includes=includes,
56+
extra_files=extra_files,
57+
show_detailed=True,
58+
is_pytorch_extension=True,
59+
hipify_extra_files_only=True)
60+
61+
hipified_sources = []
62+
for source in args.sources:
63+
s_abs = os.path.abspath(source)
64+
hipified_s_abs = (hipify_result[s_abs].hipified_path if
65+
(s_abs in hipify_result
66+
and hipify_result[s_abs].hipified_path is not None)
67+
else s_abs)
68+
hipified_sources.append(hipified_s_abs)
69+
70+
assert (len(hipified_sources) == len(args.sources))
71+
72+
# Print hipified source files.
73+
print("\n".join(hipified_sources))

0 commit comments

Comments
 (0)