File tree Expand file tree Collapse file tree 1 file changed +10
-3
lines changed Expand file tree Collapse file tree 1 file changed +10
-3
lines changed Original file line number Diff line number Diff line change 1010#include < hip/hip_runtime.h>
1111#include < hipblas/hipblas.h>
1212#include < hip/hip_fp16.h>
13+ #ifdef __HIP_PLATFORM_AMD__
14+ // for rocblas_initialize()
1315#include " rocblas/rocblas.h"
16+ #endif
1417#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
1518#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
1619#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2746,10 +2749,14 @@ void ggml_init_cublas() {
27462749 static bool initialized = false ;
27472750
27482751 if (!initialized) {
2749- #ifdef GGML_USE_HIPBLAS
2750- rocblas_initialize ();
2751- hipDeviceSynchronize ();
2752+
2753+ #ifdef __HIP_PLATFORM_AMD__
2754+ // Workaround for a rocBLAS bug when using multiple graphics cards:
2755+ // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
2756+ rocblas_initialize ();
2757+ CUDA_CHECK (cudaDeviceSynchronize ());
27522758#endif
2759+
27532760 CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
27542761 GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
27552762 int64_t total_vram = 0 ;
You can’t perform that action at this time.
0 commit comments