From d36aeb1a7daf920d7d63de4a40c60fd81310b120 Mon Sep 17 00:00:00 2001 From: Hashem Hashemi Date: Sat, 22 Feb 2025 04:40:58 +0000 Subject: [PATCH 1/2] tuning adjustment for quantized skinny gemm. --- csrc/rocm/custom_kernels.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/rocm/custom_kernels.cu b/csrc/rocm/custom_kernels.cu index 3533108b3316..997afe0f6eef 100644 --- a/csrc/rocm/custom_kernels.cu +++ b/csrc/rocm/custom_kernels.cu @@ -1715,7 +1715,7 @@ void wvSpltKQ_(void* in_a, void* in_b, void* out_c, void* scale_a, dim3 block(64, _WvPrGrp); \ if ((K_in * N_in <= 32 * 1024) && (M_in % _YTILEs == 0)) { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ - wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 16, _UNRLs, _N> \ + wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 8, _UNRLs, _N> \ <<>>(K_in, Kp_in, M_in, af4, bf4, c, s_a, \ s_b, __wvPrGrp, Otp_in, CuCount); \ } else { \ From 985e940bedd6828be2f7561be594966d3ee0a8d1 Mon Sep 17 00:00:00 2001 From: Hashem Hashemi Date: Sat, 22 Feb 2025 04:48:32 +0000 Subject: [PATCH 2/2] lint fix --- csrc/rocm/custom_kernels.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/rocm/custom_kernels.cu b/csrc/rocm/custom_kernels.cu index 997afe0f6eef..2d4a68fe3e7b 100644 --- a/csrc/rocm/custom_kernels.cu +++ b/csrc/rocm/custom_kernels.cu @@ -1715,7 +1715,7 @@ void wvSpltKQ_(void* in_a, void* in_b, void* out_c, void* scale_a, dim3 block(64, _WvPrGrp); \ if ((K_in * N_in <= 32 * 1024) && (M_in % _YTILEs == 0)) { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ - wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 8, _UNRLs, _N> \ + wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 8, _UNRLs, _N> \ <<>>(K_in, Kp_in, M_in, af4, bf4, c, s_a, \ s_b, __wvPrGrp, Otp_in, CuCount); \ } else { \