1
- default : koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas
1
+ default : koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas
2
2
tools : quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
3
3
dev : koboldcpp_openblas
4
4
dev2 : koboldcpp_clblast
40
40
41
41
# keep standard at C11 and C++11
42
42
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS
43
- CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -O3 -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
43
+ CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
44
44
LDFLAGS =
45
45
46
46
# these are used on windows, to build some libraries with extra old device compatibility
@@ -163,20 +163,34 @@ else ifdef LLAMA_CUDA_DMMV_Y
163
163
else
164
164
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
165
165
endif # LLAMA_CUDA_MMV_Y
166
+ ifdef LLAMA_CUDA_F16
167
+ NVCCFLAGS += -DGGML_CUDA_F16
168
+ endif # LLAMA_CUDA_F16
166
169
ifdef LLAMA_CUDA_DMMV_F16
167
- NVCCFLAGS += -DGGML_CUDA_DMMV_F16
170
+ NVCCFLAGS += -DGGML_CUDA_F16
168
171
endif # LLAMA_CUDA_DMMV_F16
169
172
ifdef LLAMA_CUDA_KQUANTS_ITER
170
173
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
171
174
else
172
175
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
173
176
endif
177
+ ifdef LLAMA_CUDA_MMQ_Y
178
+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
179
+ else
180
+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
181
+ endif # LLAMA_CUDA_MMQ_Y
182
+ # ifdef LLAMA_CUDA_CUBLAS
183
+ # NVCCFLAGS += -DGGML_CUDA_CUBLAS
184
+ # endif # LLAMA_CUDA_CUBLAS
185
+ ifdef LLAMA_CUDA_CCBIN
186
+ NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
187
+ endif
174
188
ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
175
- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
189
+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
176
190
ggml_v2-cuda.o : otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
177
- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
191
+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
178
192
ggml_v2-cuda-legacy.o : otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
179
- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
193
+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) $(CUBLAS_FLAGS ) $(CUBLAS_CXXFLAGS ) -Wno-pedantic -c $< -o $@
180
194
endif # LLAMA_CUBLAS
181
195
182
196
ifdef LLAMA_HIPBLAS
@@ -249,7 +263,7 @@ CXXV := $(shell $(CXX) --version | head -n 1)
249
263
DEFAULT_BUILD =
250
264
FAILSAFE_BUILD =
251
265
OPENBLAS_BUILD =
252
- OPENBLAS_NOAVX2_BUILD =
266
+ NOAVX2_BUILD =
253
267
CLBLAST_BUILD =
254
268
CUBLAS_BUILD =
255
269
HIPBLAS_BUILD =
@@ -258,7 +272,7 @@ ifeq ($(OS),Windows_NT)
258
272
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
259
273
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
260
274
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o
[email protected] $(LDFLAGS)
261
- OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^
lib/libopenblas.lib -shared -o
[email protected] $(LDFLAGS)
275
+ NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
262
276
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o
[email protected] $(LDFLAGS)
263
277
264
278
ifdef LLAMA_CUBLAS
272
286
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o
[email protected] $(LDFLAGS)
273
287
ifdef LLAMA_OPENBLAS
274
288
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
275
- OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
289
+ NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o
[email protected] $(LDFLAGS)
276
290
endif
277
291
ifdef LLAMA_CLBLAST
278
292
ifeq ($(UNAME_S),Darwin)
@@ -327,8 +341,8 @@ ggml_openblas.o: ggml.c ggml.h
327
341
$(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(OPENBLAS_FLAGS ) -c $< -o $@
328
342
ggml_failsafe.o : ggml.c ggml.h
329
343
$(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
330
- ggml_openblas_noavx2 .o : ggml.c ggml.h
331
- $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) $( OPENBLAS_FLAGS ) -c $< -o $@
344
+ ggml_noavx2 .o : ggml.c ggml.h
345
+ $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
332
346
ggml_clblast.o : ggml.c ggml.h
333
347
$(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
334
348
ggml_cublas.o : ggml.c ggml.h
@@ -342,15 +356,19 @@ k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
342
356
k_quants_failsafe.o : k_quants.c k_quants.h ggml.h ggml-cuda.h
343
357
$(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
344
358
359
+ # there's no intrinsics or special gpu ops used here, so we can have a universal object
360
+ ggml-alloc.o : ggml-alloc.c ggml.h ggml-alloc.h
361
+ $(CC ) $(CFLAGS ) -c $< -o $@
362
+
345
363
# version 2 libs
346
364
ggml_v2.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
347
365
$(CC ) $(CFLAGS ) $(FULLCFLAGS ) -c $< -o $@
348
366
ggml_v2_openblas.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
349
367
$(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(OPENBLAS_FLAGS ) -c $< -o $@
350
368
ggml_v2_failsafe.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
351
369
$(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
352
- ggml_v2_openblas_noavx2 .o : otherarch/ggml_v2.c otherarch/ggml_v2.h
353
- $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) $( OPENBLAS_FLAGS ) -c $< -o $@
370
+ ggml_v2_noavx2 .o : otherarch/ggml_v2.c otherarch/ggml_v2.h
371
+ $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
354
372
ggml_v2_clblast.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
355
373
$(CC ) $(CFLAGS ) $(FULLCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
356
374
ggml_v2_cublas.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
@@ -371,10 +389,12 @@ ggml_v2-opencl-legacy.o: otherarch/ggml_v2-opencl-legacy.c otherarch/ggml_v2-ope
371
389
$(CC ) $(CFLAGS ) -c $< -o $@
372
390
373
391
# intermediate objects
374
- llama.o : llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
392
+ llama.o : llama.cpp ggml.h ggml-alloc.h ggml- cuda.h ggml-metal .h llama.h llama-util.h
375
393
$(CXX ) $(CXXFLAGS ) -c $< -o $@
376
394
common.o : examples/common.cpp examples/common.h
377
395
$(CXX ) $(CXXFLAGS ) -c $< -o $@
396
+ console.o : examples/console.cpp examples/console.h
397
+ $(CXX ) $(CXXFLAGS ) -c $< -o $@
378
398
grammar-parser.o : examples/grammar-parser.cpp examples/grammar-parser.h
379
399
$(CXX ) $(CXXFLAGS ) -c $< -o $@
380
400
expose.o : expose.cpp expose.h
@@ -392,37 +412,37 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
392
412
$(CXX ) $(CXXFLAGS ) $(CUBLAS_FLAGS ) $(HIPFLAGS ) -c $< -o $@
393
413
394
414
clean :
395
- rm -vf * .o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2 .dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2 .so koboldcpp_clblast.so koboldcpp_cublas.so
415
+ rm -vf * .o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2 .dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2 .so koboldcpp_clblast.so koboldcpp_cublas.so
396
416
397
- main : examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o grammar-parser.o $(OBJS )
417
+ main : examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console .o grammar-parser.o $(OBJS )
398
418
$(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
399
419
@echo
400
420
@echo ' ==== Run ./main -h for help. ===='
401
421
@echo
402
422
403
423
# generated libraries
404
- koboldcpp : ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o $(OBJS )
424
+ koboldcpp : ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS )
405
425
$(DEFAULT_BUILD )
406
- koboldcpp_openblas : ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o $(OBJS )
426
+ koboldcpp_openblas : ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS )
407
427
$(OPENBLAS_BUILD )
408
- koboldcpp_failsafe : ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o $(OBJS )
428
+ koboldcpp_failsafe : ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o $(OBJS )
409
429
$(FAILSAFE_BUILD )
410
- koboldcpp_openblas_noavx2 : ggml_openblas_noavx2 .o ggml_v2_openblas_noavx2 .o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS )
411
- $(OPENBLAS_NOAVX2_BUILD )
412
- koboldcpp_clblast : ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS )
430
+ koboldcpp_noavx2 : ggml_noavx2 .o ggml_v2_noavx2 .o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc .o $(OBJS )
431
+ $(NOAVX2_BUILD )
432
+ koboldcpp_clblast : ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o $(OBJS )
413
433
$(CLBLAST_BUILD )
414
- koboldcpp_cublas : ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o $(CUBLAS_OBJS ) $(HIP_OBJS ) $(OBJS )
434
+ koboldcpp_cublas : ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o $(CUBLAS_OBJS ) $(HIP_OBJS ) $(OBJS )
415
435
$(CUBLAS_BUILD ) $(HIPBLAS_BUILD )
416
436
417
- quantize_llama : examples/quantize/quantize.cpp ggml.o llama.o k_quants.o
437
+ quantize_llama : examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
418
438
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
419
- quantize_gptj : ggml.o llama.o k_quants.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
439
+ quantize_gptj : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
420
440
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
421
- quantize_gpt2 : ggml.o llama.o k_quants.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
441
+ quantize_gpt2 : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp
422
442
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
423
- quantize_neox : ggml.o llama.o k_quants.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
443
+ quantize_neox : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
424
444
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
425
- quantize_mpt : ggml.o llama.o k_quants.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
445
+ quantize_mpt : ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
426
446
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
427
447
428
448
0 commit comments