diff --git a/backends/iluvatar_gpu/CMakeLists.txt b/backends/iluvatar_gpu/CMakeLists.txt index d79236cebf..b0c06e519d 100644 --- a/backends/iluvatar_gpu/CMakeLists.txt +++ b/backends/iluvatar_gpu/CMakeLists.txt @@ -291,6 +291,7 @@ file( ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_token_prune_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/affine_channel_grad_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/affine_channel_kernel.cu @@ -857,6 +858,7 @@ file( ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_token_prune_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu @@ -928,6 +930,7 @@ list( ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/fake_quantize_functor.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_token_prune_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/check_numerics_kernel.cu diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu index 71d6076bcd..19ffddd241 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/gpu/cvm_grad_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/cvm_grad_kernel.h" PD_CUSTOM_KERNEL_REGISTER( cvm_grad, iluvatar_gpu, ALL_LAYOUT, phi::CVMGradCUDAKernel, float, double) { diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_kernel_register.cu index 1b71499137..5e1cc180c4 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/cvm_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/gpu/cvm_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/cvm_kernel.h" PD_CUSTOM_KERNEL_REGISTER( cvm, iluvatar_gpu, ALL_LAYOUT, phi::CVMCUDAKernel, float, double) {} diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_token_prune_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_token_prune_kernel_register.cu index fabb86ab37..f342c30e50 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_token_prune_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_token_prune_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/fused_token_prune_utils.h" -#include "paddle/phi/kernels/gpu/fused_token_prune_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/fused_token_prune_kernel.h" PD_CUSTOM_KERNEL_REGISTER(fused_token_prune, iluvatar_gpu, diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu new file mode 100644 index 0000000000..e35b541604 --- /dev/null +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu @@ -0,0 +1,25 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/aligned_vector.h" +#include "paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.h" + +PD_CUSTOM_KERNEL_REGISTER(masked_multihead_attention, + iluvatar_gpu, + ALL_LAYOUT, + phi::fusion::MMHAKernel, + float, + phi::dtype::float16, + int32_t) {} diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu new file mode 100644 index 0000000000..6c0775a1aa --- /dev/null +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu @@ -0,0 +1,24 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/aligned_vector.h" +#include "paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.h" + +PD_CUSTOM_KERNEL_REGISTER(qkv_unpack_mha, + iluvatar_gpu, + ALL_LAYOUT, + phi::fusion::QKVMMHAKernel, + float, + phi::dtype::float16) {} diff --git a/backends/metax_gpu/CMakeLists.txt b/backends/metax_gpu/CMakeLists.txt index c6d1a96aea..5e4d85165f 100755 --- a/backends/metax_gpu/CMakeLists.txt +++ b/backends/metax_gpu/CMakeLists.txt @@ -612,6 +612,7 @@ file( ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_token_prune_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu diff --git a/backends/metax_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu index e1e2b60db7..aaf3d67808 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/cvm_grad_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/gpu/cvm_grad_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/cvm_grad_kernel.h" PD_CUSTOM_KERNEL_REGISTER( cvm_grad, metax_gpu, ALL_LAYOUT, phi::CVMGradCUDAKernel, float, double) {} diff --git a/backends/metax_gpu/kernels/cuda_kernels/cvm_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/cvm_kernel_register.cu index 485fabef05..0776c8d087 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/cvm_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/cvm_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/gpu/cvm_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/cvm_kernel.h" PD_CUSTOM_KERNEL_REGISTER( cvm, metax_gpu, ALL_LAYOUT, phi::CVMCUDAKernel, float, double) {} diff --git a/backends/metax_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu index 53ea098669..e8a49a8967 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/masked_multihead_attention_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" -#include "paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu" // NOLINT +#include "paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.h" PD_CUSTOM_KERNEL_REGISTER(masked_multihead_attention, metax_gpu, diff --git a/backends/metax_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu b/backends/metax_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu index bc6ab89326..845c71baf3 100644 --- a/backends/metax_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu +++ b/backends/metax_gpu/kernels/cuda_kernels/qkv_unpack_mha_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" -#include "paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu" //NOLINT +#include "paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.h" PD_CUSTOM_KERNEL_REGISTER(qkv_unpack_mha, metax_gpu, diff --git a/backends/metax_gpu/kernels/fusion/fused_token_prune_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_token_prune_kernel_register.cu index 42af265694..65183ffe42 100644 --- a/backends/metax_gpu/kernels/fusion/fused_token_prune_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_token_prune_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/fused_token_prune_utils.h" -#include "paddle/phi/kernels/gpu/fused_token_prune_kernel.cu" //NOLINT +#include "paddle/phi/kernels/gpu/fused_token_prune_kernel.h" PD_CUSTOM_KERNEL_REGISTER(fused_token_prune, metax_gpu,