@@ -173,31 +173,15 @@ endif ()
173173
174174# FA3 requires CUDA 12.0 or later
175175if (FA3_ENABLED AND ${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 12.0)
176- # BF16 source files (we only use paged, split, packgqa and sm80)
177- # we only use paged, split and packgqa since for paged_kv or varlen_q
178- # PackedGQA is the only one that is used
176+ # BF16 source files
179177 file (GLOB FA3_BF16_GEN_SRCS
180- "hopper/instantiations/flash_fwd_hdimall_bf16_*paged*_sm90.cu" )
181- file (GLOB FA3_BF16_GEN_SRCS_
182- "hopper/instantiations/flash_fwd_hdimall_bf16_*packgqa*_sm90.cu" )
183- list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
184- file (GLOB FA3_BF16_GEN_SRCS_
185- "hopper/instantiations/flash_fwd_hdimall_bf16_*split*_sm90.cu" )
186- list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
178+ "hopper/instantiations/flash_fwd_hdimall_bf16*_sm90.cu" )
187179 file (GLOB FA3_BF16_GEN_SRCS_
188180 "hopper/instantiations/flash_fwd_*_bf16_*_sm80.cu" )
189181 list (APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_} )
190- # FP16 source files (we only use paged, split and packgqa and sm80)
191- # we only use paged, split and packgqa since for paged_kv or varlen_q
192- # PackedGQA is the only one that is used
182+ # FP16 source files
193183 file (GLOB FA3_FP16_GEN_SRCS
194- "hopper/instantiations/flash_fwd_hdimall_fp16_*paged*_sm90.cu" )
195- file (GLOB FA3_FP16_GEN_SRCS_
196- "hopper/instantiations/flash_fwd_hdimall_fp16_*packgqa*_sm90.cu" )
197- list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
198- file (GLOB FA3_FP16_GEN_SRCS_
199- "hopper/instantiations/flash_fwd_hdimall_fp16_*split*_sm90.cu" )
200- list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
184+ "hopper/instantiations/flash_fwd_hdimall_fp16*_sm90.cu" )
201185 file (GLOB FA3_FP16_GEN_SRCS_
202186 "hopper/instantiations/flash_fwd_*_fp16_*_sm80.cu" )
203187 list (APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_} )
0 commit comments