Skip to content

Commit 9871266

Browse files
committed
Merge branch 'master' of github.com:ggerganov/llama.cpp into grammar-example
* 'master' of github.com:ggerganov/llama.cpp: (34 commits) examples: support LLaVA v1.5 (multimodal model) (ggml-org#3436) docs : fix typo GOMP_CPU_AFFINITY (ggml-org#3597) cmake : fix add_compile_options on macOS typo : it is `--n-gpu-layers` not `--gpu-layers` (ggml-org#3592) ci : check if there is enough VRAM (ggml-org#3596) server : add completion mode (no chat) (ggml-org#3582) prompts : add mnemonics.txt server : fix kv cache management (ggml-org#3588) main : fix session loading bug (ggml-org#3400) server : add parameter -tb N, --threads-batch N (ggml-org#3584) common : fix mirostat state when using multiple sequences (ggml-org#3543) batched : add bench tool (ggml-org#3545) examples : add batched.swift + improve CI for swift (ggml-org#3562) Add MPT model to supported models in README.md (ggml-org#3574) Minor improvements in GPT2 tokenizer (ggml-org#3567) readme : add bloom (ggml-org#3570) llm : add bloom models (ggml-org#3553) swift : improvements and fixes (ggml-org#3564) llm : add MPT support (ggml-org#3417) infill. : fix tokenization (ggml-org#3508) ...
2 parents f7b9bf1 + 370359e commit 9871266

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+16788
-2888
lines changed

Diff for: .github/workflows/build.yml

+7-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
13+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
16+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -276,6 +276,11 @@ jobs:
276276
run: |
277277
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
278278
279+
- name: Build Swift Example
280+
id: make_build_swift_example
281+
run: |
282+
make swift
283+
279284
windows-latest-cmake:
280285
runs-on: windows-latest
281286

Diff for: .github/workflows/gguf-publish.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ jobs:
3636
poetry install
3737
3838
- name: Build package
39-
run: poetry build
39+
run: cd gguf-py && poetry build
4040
- name: Publish package
4141
uses: pypa/gh-action-pypi-publish@release/v1
4242
with:
4343
password: ${{ secrets.PYPI_API_TOKEN }}
44+
packages-dir: gguf-py/dist

Diff for: .github/workflows/zig-build.yml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Zig CI
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
9+
jobs:
10+
build:
11+
strategy:
12+
fail-fast: false
13+
matrix:
14+
runs-on: [ubuntu-latest, macos-latest, windows-latest]
15+
runs-on: ${{ matrix.runs-on }}
16+
steps:
17+
- uses: actions/checkout@v3
18+
with:
19+
submodules: recursive
20+
fetch-depth: 0
21+
- uses: goto-bus-stop/setup-zig@v2
22+
with:
23+
version: 0.11.0
24+
- name: Build Summary
25+
run: zig build --summary all -freference-trace

Diff for: .gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ models-mnt
4444
/infill
4545
/libllama.so
4646
/llama-bench
47+
/llava
4748
/main
4849
/metal
4950
/perplexity
@@ -55,6 +56,7 @@ models-mnt
5556
/server
5657
/simple
5758
/batched
59+
/batched-bench
5860
/export-lora
5961
/finetune
6062
/speculative

Diff for: CMakeLists.txt

+5-3
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,7 @@ endif()
422422
if (LLAMA_ALL_WARNINGS)
423423
if (NOT MSVC)
424424
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
425-
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
426-
-Werror=implicit-function-declaration)
425+
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
427426
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
428427
set(host_cxx_flags "")
429428

@@ -455,7 +454,8 @@ if (LLAMA_ALL_WARNINGS)
455454
set(c_flags ${c_flags} ${warning_flags})
456455
set(cxx_flags ${cxx_flags} ${warning_flags})
457456
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
458-
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
457+
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
458+
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
459459

460460
endif()
461461

@@ -663,6 +663,8 @@ add_library(ggml OBJECT
663663
ggml.h
664664
ggml-alloc.c
665665
ggml-alloc.h
666+
ggml-backend.c
667+
ggml-backend.h
666668
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
667669
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
668670
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}

Diff for: Makefile

+80-36
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
2+
BUILD_TARGETS = \
3+
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4+
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench llava baby-llama beam-search \
5+
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
36

47
# Binaries only useful for tests
5-
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
8+
TEST_TARGETS = \
9+
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
10+
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11+
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
612

713
# Code coverage output files
814
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -172,6 +178,24 @@ else
172178
MK_CPPFLAGS += -DNDEBUG
173179
endif
174180

181+
ifdef LLAMA_SANITIZE_THREAD
182+
MK_CFLAGS += -fsanitize=thread -g
183+
MK_CXXFLAGS += -fsanitize=thread -g
184+
MK_LDFLAGS += -fsanitize=thread -g
185+
endif
186+
187+
ifdef LLAMA_SANITIZE_ADDRESS
188+
MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
189+
MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
190+
MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
191+
endif
192+
193+
ifdef LLAMA_SANITIZE_UNDEFINED
194+
MK_CFLAGS += -fsanitize=undefined -g
195+
MK_CXXFLAGS += -fsanitize=undefined -g
196+
MK_LDFLAGS += -fsanitize=undefined -g
197+
endif
198+
175199
ifdef LLAMA_SERVER_VERBOSE
176200
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
177201
endif
@@ -512,12 +536,21 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
512536
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
513537
$(CC) $(CFLAGS) -c $< -o $@
514538

515-
OBJS += ggml-alloc.o
539+
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
540+
$(CC) $(CFLAGS) -c $< -o $@
541+
542+
OBJS += ggml-alloc.o ggml-backend.o
516543

517-
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
544+
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
518545
$(CXX) $(CXXFLAGS) -c $< -o $@
519546

520-
common.o: common/common.cpp common/common.h build-info.h common/log.h
547+
COMMON_H_DEPS = common/common.h common/sampling.h build-info.h common/log.h
548+
COMMON_DEPS = $(COMMON_H_DEPS) common.o sampling.o
549+
550+
common.o: common/common.cpp $(COMMON_H_DEPS)
551+
$(CXX) $(CXXFLAGS) -c $< -o $@
552+
553+
sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
521554
$(CXX) $(CXXFLAGS) -c $< -o $@
522555

523556
console.o: common/console.cpp common/console.h
@@ -539,19 +572,22 @@ clean:
539572
# Examples
540573
#
541574

542-
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
575+
main: examples/main/main.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
543576
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
544577
@echo
545578
@echo '==== Run ./main -h for help. ===='
546579
@echo
547580

548-
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
581+
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
582+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
583+
584+
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
549585
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
550586

551-
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o $(OBJS)
587+
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
552588
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
553589

554-
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o common.o $(OBJS)
590+
batched-bench: examples/batched-bench/batched-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
555591
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
556592

557593
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
@@ -560,60 +596,68 @@ quantize: examples/quantize/quantize.cpp build-info.h ggml.
560596
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
561597
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
562598

563-
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
599+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
564600
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
565601

566-
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
602+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
567603
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
568604

569-
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
605+
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
570606
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
571607

572-
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
608+
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
573609
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2)
574610

575-
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)
611+
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
576612
$(CXX) --shared $(CXXFLAGS) $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
577613

578614

579-
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
615+
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
580616
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
581617

582618
gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
583619
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
584620

585-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
621+
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
586622
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
587623

588624
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
589625
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
590626

591-
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
627+
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
592628
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
593629

594-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o train.o $(OBJS)
630+
llava: examples/llava/llava.cpp examples/llava/llava-utils.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
631+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
632+
633+
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
595634
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
596635

597-
beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o common.o $(OBJS)
636+
beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
598637
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
599638

600-
finetune: examples/finetune/finetune.cpp build-info.h ggml.o llama.o common.o train.o $(OBJS)
639+
finetune: examples/finetune/finetune.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
601640
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
602641

603-
export-lora: examples/export-lora/export-lora.cpp build-info.h ggml.o llama.o common.o $(OBJS)
642+
export-lora: examples/export-lora/export-lora.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
604643
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
605644

606-
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
645+
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
607646
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
608647

609-
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o common.o $(OBJS)
648+
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
610649
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
611650

612651
ifdef LLAMA_METAL
613652
metal: examples/metal/metal.cpp ggml.o $(OBJS)
614653
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
615654
endif
616655

656+
ifeq ($(UNAME_S),Darwin)
657+
swift: examples/batched.swift
658+
(cd examples/batched.swift; make build)
659+
endif
660+
617661
build-info.h: $(wildcard .git/index) scripts/build-info.sh
618662
@sh scripts/build-info.sh $(CC) > $@.tmp
619663
@if ! cmp -s $@.tmp $@; then \
@@ -634,48 +678,48 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
634678
run-benchmark-matmult: benchmark-matmult
635679
./$@
636680

637-
.PHONY: run-benchmark-matmult
681+
.PHONY: run-benchmark-matmult swift
638682

639683
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
640684
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
641685

642686
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
643687
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
644688

645-
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
689+
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
646690
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
647691

648-
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
692+
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
649693
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650694

651-
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
695+
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
652696
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
653697

654-
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
698+
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
655699
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
656700

657-
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
701+
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
658702
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
659703

660-
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)
704+
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
661705
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
662706

663-
tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama.o common.o $(OBJS)
707+
tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
664708
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
665709

666-
tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o common.o $(OBJS)
710+
tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
667711
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
668712

669-
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h ggml.o llama.o common.o $(OBJS)
713+
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
670714
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
671715

672-
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
716+
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
673717
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
674718

675-
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o common.o $(OBJS)
719+
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
676720
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
677721

678-
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
722+
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
679723
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
680724

681725
tests/test-c.o: tests/test-c.c llama.h

Diff for: Package.swift

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
// swift-tools-version:5.3
1+
// swift-tools-version:5.5
22

33
import PackageDescription
44

55
#if arch(arm) || arch(arm64)
66
let platforms: [SupportedPlatform]? = [
7-
.macOS(.v11),
7+
.macOS(.v12),
88
.iOS(.v14),
99
.watchOS(.v4),
1010
.tvOS(.v14)
@@ -41,12 +41,13 @@ let package = Package(
4141
"ggml.c",
4242
"llama.cpp",
4343
"ggml-alloc.c",
44+
"ggml-backend.c",
4445
"k_quants.c",
4546
] + additionalSources,
4647
resources: resources,
4748
publicHeadersPath: "spm-headers",
4849
cSettings: [
49-
.unsafeFlags(["-Wno-shorten-64-to-32"]),
50+
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
5051
.define("GGML_USE_K_QUANTS"),
5152
.define("GGML_USE_ACCELERATE")
5253
// NOTE: NEW_LAPACK will required iOS version 16.4+

Diff for: README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ as the main playground for developing new features for the [ggml](https://github
9696
- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187)
9797
- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
9898
- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim)
99+
- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553)
100+
- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417)
99101

100102
**Bindings:**
101103

@@ -277,7 +279,7 @@ In order to build llama.cpp you have three different options.
277279
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
278280
To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
279281

280-
When built with Metal support, you can explicitly disable GPU inference with the `--gpu-layers|-ngl 0` command-line
282+
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
281283
argument.
282284

283285
### MPI Build

0 commit comments

Comments
 (0)