File tree Expand file tree Collapse file tree 1 file changed +9
-1
lines changed Expand file tree Collapse file tree 1 file changed +9
-1
lines changed Original file line number Diff line number Diff line change @@ -167,13 +167,21 @@ ifdef LLAMA_HIPBLAS
167167 ROCM_PATH ?= /opt/rocm
168168 CC := $(ROCM_PATH)/llvm/bin/clang
169169 CXX := $(ROCM_PATH)/llvm/bin/clang++
170- GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030
170+ GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030
171171 LLAMA_CUDA_DMMV_X ?= 64
172172 LLAMA_CUDA_DMMV_Y ?= 2
173173 CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
174174 CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
175175 LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
176176 OBJS += ggml-cuda.o ggml_v2-cuda.o
177+ ifdef LLAMA_CUDA_DMMV_F16
178+ NVCCFLAGS += -DGGML_CUDA_DMMV_F16
179+ endif # LLAMA_CUDA_DMMV_F16
180+ ifdef LLAMA_CUDA_KQUANTS_ITER
181+ NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
182+ else
183+ NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
184+ endif
177185ggml-cuda.o : CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS ) )
178186ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X )
179187ggml-cuda.o : CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y )
You can’t perform that action at this time.
0 commit comments