Skip to content

Commit 3416c98

Browse files
committed
Merge remote-tracking branch 'upstream/concedo'
2 parents 5eb17f0 + 4c4e435 commit 3416c98

File tree

113 files changed

+36218
-8344
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+36218
-8344
lines changed

.devops/lamma-cpp-clblast.srpm.spec

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - [email protected]
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-clblast
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: OpenCL Inference of LLaMA model in pure C/C++
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel
22+
URL: https://github.com/ggerganov/llama.cpp
23+
24+
%define debug_package %{nil}
25+
%define source_date_epoch_from_changelog 0
26+
27+
%description
28+
CPU inference for Meta's Lllama2 models using default options.
29+
30+
%prep
31+
%setup -n llama.cpp-master
32+
33+
%build
34+
make -j LLAMA_CLBLAST=1
35+
36+
%install
37+
mkdir -p %{buildroot}%{_bindir}/
38+
cp -p main %{buildroot}%{_bindir}/llamacppclblast
39+
cp -p server %{buildroot}%{_bindir}/llamacppclblastserver
40+
cp -p simple %{buildroot}%{_bindir}/llamacppclblastsimple
41+
42+
%clean
43+
rm -rf %{buildroot}
44+
rm -rf %{_builddir}/*
45+
46+
%files
47+
%{_bindir}/llamacppclblast
48+
%{_bindir}/llamacppclblastserver
49+
%{_bindir}/llamacppclblastsimple
50+
51+
%pre
52+
53+
%post
54+
55+
%preun
56+
%postun
57+
58+
%changelog

.devops/lamma-cpp-cublas.srpm.spec

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - [email protected]
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-cublas
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22+
Requires: cuda-toolkit
23+
URL: https://github.com/ggerganov/llama.cpp
24+
25+
%define debug_package %{nil}
26+
%define source_date_epoch_from_changelog 0
27+
28+
%description
29+
CPU inference for Meta's Lllama2 models using default options.
30+
31+
%prep
32+
%setup -n llama.cpp-master
33+
34+
%build
35+
make -j LLAMA_CUBLAS=1
36+
37+
%install
38+
mkdir -p %{buildroot}%{_bindir}/
39+
cp -p main %{buildroot}%{_bindir}/llamacppcublas
40+
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
41+
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
42+
43+
%clean
44+
rm -rf %{buildroot}
45+
rm -rf %{_builddir}/*
46+
47+
%files
48+
%{_bindir}/llamacppcublas
49+
%{_bindir}/llamacppcublasserver
50+
%{_bindir}/llamacppcublassimple
51+
52+
%pre
53+
54+
%post
55+
56+
%preun
57+
%postun
58+
59+
%changelog

.devops/llama-cpp.srpm.spec

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - [email protected]
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp
16+
Version: master
17+
Release: 1%{?dist}
18+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git
22+
URL: https://github.com/ggerganov/llama.cpp
23+
24+
%define debug_package %{nil}
25+
%define source_date_epoch_from_changelog 0
26+
27+
%description
28+
CPU inference for Meta's Lllama2 models using default options.
29+
30+
%prep
31+
%autosetup
32+
33+
%build
34+
make -j
35+
36+
%install
37+
mkdir -p %{buildroot}%{_bindir}/
38+
cp -p main %{buildroot}%{_bindir}/llamacpp
39+
cp -p server %{buildroot}%{_bindir}/llamacppserver
40+
cp -p simple %{buildroot}%{_bindir}/llamacppsimple
41+
42+
%clean
43+
rm -rf %{buildroot}
44+
rm -rf %{_builddir}/*
45+
46+
%files
47+
%{_bindir}/llamacpp
48+
%{_bindir}/llamacppserver
49+
%{_bindir}/llamacppsimple
50+
51+
%pre
52+
53+
%post
54+
55+
%preun
56+
%postun
57+
58+
%changelog

.gitignore

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
*.o
22
*.a
3-
*.so
3+
*.bin
44
.DS_Store
55
.build/
66
.cache/
@@ -26,14 +26,18 @@ models-mnt
2626
/perplexity
2727
/embedding
2828
/train-text-from-scratch
29+
/convert-llama2c-to-ggml
2930
/simple
3031
/benchmark-matmult
3132
/vdot
3233
/server
3334
/Pipfile
3435
/embd-input-test
36+
/gguf
37+
/gguf-llama-simple
3538
/libllama.so
36-
39+
/llama-bench
40+
build-info.h
3741
arm_neon.h
3842
compile_commands.json
3943
CMakeSettings.json
@@ -57,6 +61,7 @@ poetry.lock
5761
poetry.toml
5862

5963
# Test binaries
64+
tests/test-grammar-parser
6065
tests/test-double-float
6166
tests/test-grad0
6267
tests/test-opt
@@ -65,16 +70,17 @@ tests/test-quantize-perf
6570
tests/test-sampling
6671
tests/test-tokenizer-0
6772

68-
koboldcpp.so
69-
koboldcpp_failsafe.so
70-
koboldcpp_openblas.so
71-
koboldcpp_noavx2.so
72-
koboldcpp_clblast.so
73-
koboldcpp.dll
74-
koboldcpp_failsafe.dll
75-
koboldcpp_openblas.dll
76-
koboldcpp_noavx2.dll
77-
koboldcpp_clblast.dll
78-
koboldcpp_cublas.dll
79-
cublas64_11.dll
80-
cublasLt64_11.dll
73+
/koboldcpp_default.so
74+
/koboldcpp_failsafe.so
75+
/koboldcpp_openblas.so
76+
/koboldcpp_noavx2.so
77+
/koboldcpp_clblast.so
78+
/koboldcpp_cublas.so
79+
/koboldcpp_default.dll
80+
/koboldcpp_failsafe.dll
81+
/koboldcpp_openblas.dll
82+
/koboldcpp_noavx2.dll
83+
/koboldcpp_clblast.dll
84+
/koboldcpp_cublas.dll
85+
/cublas64_11.dll
86+
/cublasLt64_11.dll

CMakeLists.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -376,24 +376,24 @@ target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
376376
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
377377

378378
add_library(common2
379-
examples/common.cpp
380-
examples/common.h)
381-
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./examples)
379+
common/common.cpp
380+
common/common.h)
381+
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
382382
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
383383
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
384384
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
385385

386386
add_library(gpttype_adapter
387387
gpttype_adapter.cpp)
388-
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./examples)
388+
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
389389
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
390390
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
391391
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
392392

393393

394394
set(TARGET koboldcpp_cublas)
395395
add_library(${TARGET} SHARED expose.cpp expose.h)
396-
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples)
396+
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common)
397397
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
398398
set_target_properties(${TARGET} PROPERTIES PREFIX "")
399399
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
@@ -403,10 +403,10 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
403403

404404

405405
if (MAKE_MISC_FILES)
406+
add_subdirectory(common)
406407
add_library(llama
407408
llama.cpp
408409
llama.h
409-
llama-util.h
410410
)
411411
target_include_directories(llama PUBLIC .)
412412
target_compile_features(llama PUBLIC cxx_std_11) # don't bump

MIT_LICENSE_GGML_LLAMACPP_ONLY

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ SOFTWARE.
2323
===================================
2424

2525
Note that the above license applies ONLY to the GGML library and llama.cpp by ggerganov which are licensed under the MIT License
26-
Kobold Lite by Concedo and the provided python ctypes bindings in koboldcpp.dll are licensed under the AGPL v3.0 License
26+
Kobold Lite by Concedo and the provided python ctypes bindings in koboldcpp dlls are licensed under the AGPL v3.0 License

Makefile

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas
1+
default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas
22
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
33
dev: koboldcpp_openblas
44
dev2: koboldcpp_clblast
@@ -40,7 +40,7 @@ endif
4040

4141
# keep standard at C11 and C++11
4242
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS
43-
CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
43+
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS
4444
LDFLAGS =
4545

4646
# these are used on windows, to build some libraries with extra old device compatibility
@@ -393,19 +393,19 @@ ggml_v2-opencl-legacy.o: otherarch/ggml_v2-opencl-legacy.c otherarch/ggml_v2-ope
393393
$(CC) $(CFLAGS) -c $< -o $@
394394

395395
# intermediate objects
396-
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
396+
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h otherarch/llama-util.h
397397
$(CXX) $(CXXFLAGS) -c $< -o $@
398-
common.o: examples/common.cpp examples/common.h
398+
common.o: common/common.cpp common/common.h
399399
$(CXX) $(CXXFLAGS) -c $< -o $@
400-
console.o: examples/console.cpp examples/console.h
400+
console.o: common/console.cpp common/console.h
401401
$(CXX) $(CXXFLAGS) -c $< -o $@
402-
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
402+
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
403403
$(CXX) $(CXXFLAGS) -c $< -o $@
404404
expose.o: expose.cpp expose.h
405405
$(CXX) $(CXXFLAGS) -c $< -o $@
406406

407407
# idiotic "for easier compilation"
408-
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h llama-util.h
408+
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h otherarch/llama-util.h
409409
gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
410410
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
411411
gpttype_adapter.o: $(GPTTYPE_ADAPTER)
@@ -416,16 +416,19 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
416416
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
417417

418418
clean:
419-
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so
419+
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so
420420

421421
main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o common.o console.o grammar-parser.o $(OBJS)
422422
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
423423
@echo
424424
@echo '==== Run ./main -h for help. ===='
425425
@echo
426426

427+
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
428+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
429+
427430
#generated libraries
428-
koboldcpp: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS)
431+
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS)
429432
$(DEFAULT_BUILD)
430433
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o $(OBJS)
431434
$(OPENBLAS_BUILD)

0 commit comments

Comments
 (0)