1- default : koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas
1+ default : koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas
22tools : quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
33dev : koboldcpp_openblas
44dev2 : koboldcpp_clblast
4040
4141# keep standard at C11 and C++11
4242CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS
43- CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -O3 -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
43+ CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
4444LDFLAGS =
4545
4646# these are used on windows, to build some libraries with extra old device compatibility
@@ -163,20 +163,34 @@ else ifdef LLAMA_CUDA_DMMV_Y
163163else
164164 NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
165165endif # LLAMA_CUDA_MMV_Y
166+ ifdef LLAMA_CUDA_F16
167+ NVCCFLAGS += -DGGML_CUDA_F16
168+ endif # LLAMA_CUDA_F16
166169ifdef LLAMA_CUDA_DMMV_F16
167- NVCCFLAGS += -DGGML_CUDA_DMMV_F16
170+ NVCCFLAGS += -DGGML_CUDA_F16
168171endif # LLAMA_CUDA_DMMV_F16
169172ifdef LLAMA_CUDA_KQUANTS_ITER
170173 NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
171174else
172175 NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
173176endif
177+ ifdef LLAMA_CUDA_MMQ_Y
178+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
179+ else
180+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
181+ endif # LLAMA_CUDA_MMQ_Y
182+ # ifdef LLAMA_CUDA_CUBLAS
183+ # NVCCFLAGS += -DGGML_CUDA_CUBLAS
184+ # endif # LLAMA_CUDA_CUBLAS
185+ ifdef LLAMA_CUDA_CCBIN
186+ NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
187+ endif
174188ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
175- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
189+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
176190ggml_v2-cuda.o : otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
177- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
191+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
178192ggml_v2-cuda-legacy.o : otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
179- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
193+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
180194endif # LLAMA_CUBLAS
181195
182196ifdef LLAMA_HIPBLAS
@@ -249,7 +263,7 @@ CXXV := $(shell $(CXX) --version | head -n 1)
249263DEFAULT_BUILD =
250264FAILSAFE_BUILD =
251265OPENBLAS_BUILD =
252- OPENBLAS_NOAVX2_BUILD =
266+ NOAVX2_BUILD =
253267CLBLAST_BUILD =
254268CUBLAS_BUILD =
255269HIPBLAS_BUILD =
@@ -258,7 +272,7 @@ ifeq ($(OS),Windows_NT)
258272 DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
259273 FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
260274 OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o
[email protected] $(LDFLAGS)
261- OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^
lib/libopenblas.lib -shared -o
[email protected] $(LDFLAGS)
275+ NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
262276 CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o
[email protected] $(LDFLAGS)
263277
264278 ifdef LLAMA_CUBLAS
272286 FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
273287 ifdef LLAMA_OPENBLAS
274288 OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
275- OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
289+ NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
276290 endif
277291 ifdef LLAMA_CLBLAST
278292 ifeq ($(UNAME_S),Darwin)
@@ -327,8 +341,8 @@ ggml_openblas.o: ggml.c ggml.h
327341 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(OPENBLAS_FLAGS ) -c $< -o $@
328342ggml_failsafe.o : ggml.c ggml.h
329343 $(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
330- ggml_openblas_noavx2 .o : ggml.c ggml.h
331- $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) $( OPENBLAS_FLAGS ) -c $< -o $@
344+ ggml_noavx2 .o : ggml.c ggml.h
345+ $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
332346ggml_clblast.o : ggml.c ggml.h
333347 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
334348ggml_cublas.o : ggml.c ggml.h
@@ -342,15 +356,19 @@ k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
342356k_quants_failsafe.o : k_quants.c k_quants.h ggml.h ggml-cuda.h
343357 $(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
344358
359+ # there's no intrinsics or special gpu ops used here, so we can have a universal object
360+ ggml-alloc.o : ggml-alloc.c ggml.h ggml-alloc.h
361+ $(CC ) $(CFLAGS ) -c $< -o $@
362+
345363# version 2 libs
346364ggml_v2.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
347365 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) -c $< -o $@
348366ggml_v2_openblas.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
349367 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(OPENBLAS_FLAGS ) -c $< -o $@
350368ggml_v2_failsafe.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
351369 $(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
352- ggml_v2_openblas_noavx2 .o : otherarch/ggml_v2.c otherarch/ggml_v2.h
353- $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) $( OPENBLAS_FLAGS ) -c $< -o $@
370+ ggml_v2_noavx2 .o : otherarch/ggml_v2.c otherarch/ggml_v2.h
371+ $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
354372ggml_v2_clblast.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
355373 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
356374ggml_v2_cublas.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
@@ -371,10 +389,12 @@ ggml_v2-opencl-legacy.o: otherarch/ggml_v2-opencl-legacy.c otherarch/ggml_v2-ope
371389 $(CC ) $(CFLAGS ) -c $< -o $@
372390
373391# intermediate objects
374- llama.o : llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
392+ llama.o : llama.cpp ggml.h ggml-alloc.h ggml- cuda.h ggml-metal .h llama.h llama-util.h
375393 $(CXX ) $(CXXFLAGS ) -c $< -o $@
376394common.o : examples/common.cpp examples/common.h
377395 $(CXX ) $(CXXFLAGS ) -c $< -o $@
396+ console.o : examples/console.cpp examples/console.h
397+ $(CXX ) $(CXXFLAGS ) -c $< -o $@
378398grammar-parser.o : examples/grammar-parser.cpp examples/grammar-parser.h
379399 $(CXX ) $(CXXFLAGS ) -c $< -o $@
380400expose.o : expose.cpp expose.h
@@ -392,37 +412,37 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
392412 $(CXX ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(HIPFLAGS ) -c $< -o $@
393413
394414clean :
395- rm -vf * .o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2 .dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2 .so koboldcpp_clblast.so koboldcpp_cublas.so
415+ rm -vf * .o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2 .dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2 .so koboldcpp_clblast.so koboldcpp_cublas.so
396416
397- main : examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o grammar-parser.o $(OBJS )
417+ main : examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console .o grammar-parser.o $(OBJS )
398418 $(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
399419 @echo
400420 @echo ' ==== Run ./main -h for help. ===='
401421 @echo
402422
403423# generated libraries
404- koboldcpp : ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o $(OBJS )
424+ koboldcpp : ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS )
405425 $(DEFAULT_BUILD )
406- koboldcpp_openblas : ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o $(OBJS )
426+ koboldcpp_openblas : ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS )
407427 $(OPENBLAS_BUILD )
408- koboldcpp_failsafe : ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o $(OBJS )
428+ koboldcpp_failsafe : ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o $(OBJS )
409429 $(FAILSAFE_BUILD )
410- koboldcpp_openblas_noavx2 : ggml_openblas_noavx2 .o ggml_v2_openblas_noavx2 .o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS )
411- $(OPENBLAS_NOAVX2_BUILD )
412- koboldcpp_clblast : ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS )
430+ koboldcpp_noavx2 : ggml_noavx2 .o ggml_v2_noavx2 .o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc .o $(OBJS )
431+ $(NOAVX2_BUILD )
432+ koboldcpp_clblast : ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o $(OBJS )
413433 $(CLBLAST_BUILD )
414- koboldcpp_cublas : ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o $(CUBLAS_OBJS ) $(HIP_OBJS ) $(OBJS )
434+ koboldcpp_cublas : ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS ) $(HIP_OBJS ) $(OBJS )
415435 $(CUBLAS_BUILD ) $(HIPBLAS_BUILD )
416436
417- quantize_llama : examples/quantize/quantize.cpp ggml.o llama.o k_quants.o
437+ quantize_llama : examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
418438 $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
419- quantize_gptj : ggml.o llama.o k_quants.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
439+ quantize_gptj : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
420440 $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
421- quantize_gpt2 : ggml.o llama.o k_quants.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
441+ quantize_gpt2 : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
422442 $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
423- quantize_neox : ggml.o llama.o k_quants.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
443+ quantize_neox : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
424444 $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
425- quantize_mpt : ggml.o llama.o k_quants.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
445+ quantize_mpt : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
426446 $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
427447
428448
0 commit comments