File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -5319,13 +5319,13 @@ static __global__ void mul_mat_vec_q(
53195319 const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst,
53205320 const int ncols_x, const int nrows_x, const int nrows_y, const int nrows_dst) {
53215321
5322- #if __CUDA_ARCH__ < CC_RDNA2
5323- constexpr int nwarps = ncols_y <= 4 ? 4 : 2 ;
5324- constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2 ;
5325- #else
5322+ #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3))
53265323 constexpr int nwarps = 1 ;
53275324 constexpr int rows_per_cuda_block = 1 ;
5328- #endif // __CUDA_ARCH__ < CC_RDNA2
5325+ #else
5326+ constexpr int nwarps = ncols_y <= 4 ? 4 : 2 ;
5327+ constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2 ;
5328+ #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3)
53295329
53305330 constexpr int blocks_per_iter = vdr * nwarps*WARP_SIZE / qi;
53315331
You can’t perform that action at this time.
0 commit comments