Skip to content

Commit 32f421b

Browse files
authored
Add CUDA compute capability compile guard (#636)
add compile guard
1 parent e11201a commit 32f421b

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

torchao/csrc/cuda/fp6_llm/fp6_linear.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
//
1515
// This file is adapted from https://github.com/usyd-fsalab/fp6_llm/blob/5df6737cca32f604e957e3f63f03ccc2e4d1df0d/fp6_llm/csrc/fp6_linear.cu
1616

17+
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 // at least Ampere
18+
1719
#include "kernel_matmul.cuh"
1820
#include "kernel_reduction.cuh"
1921

@@ -200,3 +202,5 @@ TORCH_LIBRARY_IMPL(torchao, CUDA, m) {
200202
}
201203

202204
} // namespace torchao
205+
206+
#endif

torchao/csrc/cuda/tensor_core_tiled_layout/tensor_core_tiled_layout.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 // at least Ampere
2+
13
#include <ATen/ATen.h>
24
#include <ATen/core/Tensor.h>
35
#include <ATen/cuda/CUDAContext.h>
@@ -310,3 +312,5 @@ TORCH_LIBRARY_IMPL(torchao, CUDA, m) {
310312
m.impl("torchao::dequantize_tensor_core_tiled_layout", &_dequantize_tensor_core_tiled_layout);
311313

312314
}
315+
316+
#endif

0 commit comments

Comments
 (0)