Skip to content

Commit 062561d

Browse files
committed
2 parents 45916f9 + 40e07a6 commit 062561d

File tree

20 files changed

+324
-285
lines changed

20 files changed

+324
-285
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ models-mnt
4545
/main
4646
/metal
4747
/perplexity
48+
/q8dot
4849
/quantize
4950
/quantize-stats
5051
/result

CMakeLists.txt

+26-25
Original file line numberDiff line numberDiff line change
@@ -420,37 +420,38 @@ endif()
420420

421421
if (LLAMA_ALL_WARNINGS)
422422
if (NOT MSVC)
423-
set(c_flags
424-
-Wall
425-
-Wextra
426-
-Wpedantic
427-
-Wcast-qual
428-
-Wdouble-promotion
429-
-Wshadow
430-
-Wstrict-prototypes
431-
-Wpointer-arith
432-
-Wmissing-prototypes
433-
-Werror=implicit-int
434-
-Wno-unused-function
435-
)
436-
set(cxx_flags
437-
-Wall
438-
-Wextra
439-
-Wpedantic
440-
-Wcast-qual
441-
-Wmissing-declarations
442-
-Wno-unused-function
443-
-Wno-multichar
444-
)
445-
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
446-
# g++ only
447-
set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
423+
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
424+
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
425+
-Werror=implicit-function-declaration)
426+
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
427+
428+
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
429+
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
430+
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
431+
432+
if (
433+
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
434+
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
435+
)
436+
set(c_flags ${c_flags} -Wdouble-promotion)
437+
endif()
438+
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
439+
set(c_flags ${c_flags} -Wdouble-promotion)
440+
set(cxx_flags ${cxx_flags} -Wno-array-bounds)
441+
442+
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
443+
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
444+
endif()
445+
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
446+
set(cxx_flags ${cxx_flags} -Wextra-semi)
447+
endif()
448448
endif()
449449
else()
450450
# todo : msvc
451451
endif()
452452

453453
add_compile_options(
454+
${warning_flags}
454455
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
455456
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
456457
)

Makefile

+52-21
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
33

44
# Binaries only useful for tests
55
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
@@ -19,6 +19,20 @@ ifndef UNAME_M
1919
UNAME_M := $(shell uname -m)
2020
endif
2121

22+
ifeq '' '$(findstring clang,$(shell $(CC) --version))'
23+
CC_IS_GCC=1
24+
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
25+
else
26+
CC_IS_CLANG=1
27+
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
28+
CC_IS_LLVM_CLANG=1
29+
else
30+
CC_IS_APPLE_CLANG=1
31+
endif
32+
CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
33+
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
34+
endif
35+
2236
# Mac OS + Arm can report x86_64
2337
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
2438
ifeq ($(UNAME_S),Darwin)
@@ -87,9 +101,6 @@ CC := riscv64-unknown-linux-gnu-gcc
87101
CXX := riscv64-unknown-linux-gnu-g++
88102
endif
89103

90-
CCV := $(shell $(CC) --version | head -n 1)
91-
CXXV := $(shell $(CXX) --version | head -n 1)
92-
93104
#
94105
# Compile flags
95106
#
@@ -173,20 +184,33 @@ ifdef LLAMA_DISABLE_LOGS
173184
endif # LLAMA_DISABLE_LOGS
174185

175186
# warnings
176-
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
177-
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
178-
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
179-
180-
# TODO(cebtenzzre): remove this once PR #2632 gets merged
181-
TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
182-
183-
ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
184-
# clang++ only
185-
MK_CXXFLAGS += -Wmissing-prototypes
186-
TTFS_CXXFLAGS += -Wno-missing-prototypes
187+
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
188+
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
189+
-Werror=implicit-function-declaration
190+
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
191+
192+
ifeq ($(CC_IS_CLANG), 1)
193+
# clang options
194+
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
195+
MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
196+
197+
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
198+
MK_CFLAGS += -Wdouble-promotion
199+
endif
200+
ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
201+
MK_CFLAGS += -Wdouble-promotion
202+
endif
187203
else
188-
# g++ only
189-
MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
204+
# gcc options
205+
MK_CFLAGS += -Wdouble-promotion
206+
MK_HOST_CXXFLAGS += -Wno-array-bounds
207+
208+
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
209+
MK_HOST_CXXFLAGS += -Wno-format-truncation
210+
endif
211+
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
212+
MK_HOST_CXXFLAGS += -Wextra-semi
213+
endif
190214
endif
191215

192216
# OS specific
@@ -382,7 +406,7 @@ ifdef LLAMA_CUDA_CCBIN
382406
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
383407
endif
384408
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
385-
$(NVCC) $(NVCCFLAGS) -Wno-pedantic -c $< -o $@
409+
$(NVCC) $(NVCCFLAGS) -c $< -o $@
386410
endif # LLAMA_CUBLAS
387411

388412
ifdef LLAMA_CLBLAST
@@ -472,8 +496,8 @@ $(info I CFLAGS: $(CFLAGS))
472496
$(info I CXXFLAGS: $(CXXFLAGS))
473497
$(info I NVCCFLAGS: $(NVCCFLAGS))
474498
$(info I LDFLAGS: $(LDFLAGS))
475-
$(info I CC: $(CCV))
476-
$(info I CXX: $(CXXV))
499+
$(info I CC: $(shell $(CC) --version | head -n 1))
500+
$(info I CXX: $(shell $(CXX) --version | head -n 1))
477501
$(info )
478502

479503
#
@@ -554,7 +578,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
554578
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
555579

556580
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
557-
$(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
581+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
558582

559583
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
560584
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@@ -601,11 +625,18 @@ tests: $(TEST_TARGETS)
601625

602626
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
603627
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
628+
629+
run-benchmark-matmult: benchmark-matmult
604630
./$@
605631

632+
.PHONY: run-benchmark-matmult
633+
606634
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
607635
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
608636

637+
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
638+
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
639+
609640
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
610641
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
611642

Package.swift

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ let platforms: [SupportedPlatform]? = [
1010
.tvOS(.v14)
1111
]
1212
let exclude: [String] = []
13-
let additionalSources: [String] = ["ggml-metal.m"]
13+
let additionalSources: [String] = ["ggml-metal.m", "ggml-metal.metal"]
1414
let additionalSettings: [CSetting] = [
1515
.unsafeFlags(["-fno-objc-arc"]),
1616
.define("GGML_SWIFT"),
@@ -44,8 +44,8 @@ let package = Package(
4444
cSettings: [
4545
.unsafeFlags(["-Wno-shorten-64-to-32"]),
4646
.define("GGML_USE_K_QUANTS"),
47-
.define("GGML_USE_ACCELERATE")
48-
.define("ACCELERATE_NEW_LAPACK")
47+
.define("GGML_USE_ACCELERATE"),
48+
.define("ACCELERATE_NEW_LAPACK"),
4949
.define("ACCELERATE_LAPACK_ILP64")
5050
] + additionalSettings,
5151
linkerSettings: [

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
1111

1212
### Hot topics
1313

14-
- Parallel decoding + continuous batching support incoming: [#3228](https://github.com/ggerganov/llama.cpp/pull/3228) \
14+
- ‼️ Breaking change: `rope_freq_base` and `rope_freq_scale` must be set to zero to use the model default values: [#3401](https://github.com/ggerganov/llama.cpp/pull/3401)
15+
- Parallel decoding + continuous batching support added: [#3228](https://github.com/ggerganov/llama.cpp/pull/3228) \
1516
**Devs should become familiar with the new API**
1617
- Local Falcon 180B inference on Mac Studio
1718

@@ -92,7 +93,8 @@ as the main playground for developing new features for the [ggml](https://github
9293
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
9394
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
9495
- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
95-
- [X] Mistral AI v0.1
96+
- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187)
97+
- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
9698

9799
**Bindings:**
98100

@@ -662,6 +664,8 @@ PROMPT_TEMPLATE=./prompts/chat-with-bob.txt PROMPT_CACHE_FILE=bob.prompt.bin \
662664
663665
The `grammars/` folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](./grammars/README.md).
664666
667+
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
668+
665669
### Instruction mode with Alpaca
666670
667671
1. First, download the `ggml` Alpaca model into the `./models` folder

common/common.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -755,10 +755,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
755755
case 7: return "He";
756756
case 8: return "She";
757757
case 9: return "They";
758-
default: return "To";
759758
}
760759

761-
return "The";
760+
GGML_UNREACHABLE();
762761
}
763762

764763
//

0 commit comments

Comments
 (0)