Skip to content

Commit 33091a9

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 9fdaa1d + 2d43387 commit 33091a9

37 files changed

+5867
-253
lines changed

.devops/full.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip
6+
apt-get install -y build-essential python3 python3-pip git
77

88
COPY requirements.txt requirements.txt
99

.devops/main.Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential
6+
apt-get install -y build-essential git
77

88
WORKDIR /app
99

.devops/tools.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ shift
1111
arg2="$@"
1212

1313
if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
14-
python3 ./convert-pth-to-ggml.py $arg2
14+
python3 ./convert.py $arg2
1515
elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
1616
./quantize $arg2
1717
elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
@@ -32,7 +32,7 @@ else
3232
echo " --run (-r): Run a model previously converted into ggml"
3333
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
3434
echo " --convert (-c): Convert a llama model into ggml"
35-
echo " ex: \"/models/7B/\" 1"
35+
echo " ex: --outtype f16 \"/models/7B/\" "
3636
echo " --quantize (-q): Optimize with quantization process ggml"
3737
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
3838
echo " --all-in-one (-a): Execute --convert & --quantize"

.github/workflows/build.yml

+8-8
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
13+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
16+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -157,15 +157,15 @@ jobs:
157157
matrix:
158158
include:
159159
- build: 'avx2'
160-
defines: ''
160+
defines: '-DLLAMA_BUILD_SERVER=ON'
161161
- build: 'avx'
162-
defines: '-DLLAMA_AVX2=OFF'
162+
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF'
163163
- build: 'avx512'
164-
defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
164+
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
165165
- build: 'clblast'
166-
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
166+
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
167167
- build: 'openblas'
168-
defines: '-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
168+
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
169169

170170
steps:
171171
- name: Clone
@@ -292,7 +292,7 @@ jobs:
292292
run: |
293293
mkdir build
294294
cd build
295-
cmake .. -DLLAMA_CUBLAS=ON
295+
cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON
296296
cmake --build . --config Release
297297
298298
- name: Get commit hash

.github/workflows/tidy-post.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: clang-tidy review post comments
22

33
on:
4-
workflow_run:
4+
workflow_dispatch:
55
workflows: ["clang-tidy-review"]
66
types:
77
- completed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
.envrc
88
.swiftpm
99
.venv
10+
.clang-tidy
1011
.vs/
1112
.vscode/
1213

@@ -17,6 +18,7 @@ build-release/
1718
build-static/
1819
build-cublas/
1920
build-opencl/
21+
build-metal/
2022
build-no-accel/
2123
build-sanitize-addr/
2224
build-sanitize-thread/
@@ -33,6 +35,7 @@ models/*
3335
/benchmark-matmult
3436
/vdot
3537
/Pipfile
38+
/libllama.so
3639

3740
build-info.h
3841
arm_neon.h

CMakeLists.txt

+50-16
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,15 @@ if (NOT MSVC)
6464
endif()
6565

6666
# 3rd party libs
67-
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
68-
option(LLAMA_BLAS "llama: use BLAS" OFF)
67+
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
68+
option(LLAMA_BLAS "llama: use BLAS" OFF)
6969
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
70-
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
71-
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
72-
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
73-
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
74-
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
70+
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
71+
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
72+
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
73+
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
74+
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
75+
option(LLAMA_METAL "llama: use Metal" OFF)
7576

7677
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7778
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -184,7 +185,7 @@ if (LLAMA_CUBLAS)
184185

185186
enable_language(CUDA)
186187

187-
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
188+
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
188189

189190
add_compile_definitions(GGML_USE_CUBLAS)
190191
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
@@ -201,12 +202,37 @@ if (LLAMA_CUBLAS)
201202
endif()
202203
endif()
203204

205+
if (LLAMA_METAL)
206+
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
207+
find_library(METAL_FRAMEWORK Metal REQUIRED)
208+
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
209+
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
210+
211+
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
212+
213+
add_compile_definitions(GGML_USE_METAL)
214+
add_compile_definitions(GGML_METAL_NDEBUG)
215+
216+
# get full path to the file
217+
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
218+
219+
# copy ggml-metal.metal to bin directory
220+
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
221+
222+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
223+
${FOUNDATION_LIBRARY}
224+
${METAL_FRAMEWORK}
225+
${METALKIT_FRAMEWORK}
226+
${METALPERFORMANCE_FRAMEWORK}
227+
)
228+
endif()
229+
204230
if (LLAMA_CLBLAST)
205231
find_package(CLBlast)
206232
if (CLBlast_FOUND)
207233
message(STATUS "CLBlast found")
208234

209-
set(GGML_OPENCL_SOURCES ggml-opencl.cpp ggml-opencl.h)
235+
set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h)
210236

211237
add_compile_definitions(GGML_USE_CLBLAST)
212238

@@ -402,8 +428,12 @@ endif()
402428
add_library(ggml OBJECT
403429
ggml.c
404430
ggml.h
405-
${GGML_CUDA_SOURCES}
406-
${GGML_OPENCL_SOURCES})
431+
ggml-quants-k.h
432+
ggml-quants-k.c
433+
${GGML_SOURCES_CUDA}
434+
${GGML_SOURCES_OPENCL}
435+
${GGML_SOURCES_METAL}
436+
)
407437

408438
target_include_directories(ggml PUBLIC .)
409439
target_compile_features(ggml PUBLIC c_std_11) # don't bump
@@ -416,21 +446,25 @@ endif()
416446
add_library(llama
417447
llama.cpp
418448
llama.h
419-
llama-util.h)
449+
llama-util.h
450+
)
420451

421452
target_include_directories(llama PUBLIC .)
422453
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
423-
target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS})
454+
target_link_libraries(llama PRIVATE
455+
ggml
456+
${LLAMA_EXTRA_LIBS}
457+
)
424458

425459
if (BUILD_SHARED_LIBS)
426460
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
427461
target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
428462
endif()
429463

430-
if (GGML_CUDA_SOURCES)
464+
if (GGML_SOURCES_CUDA)
431465
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
432-
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
433-
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
466+
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
467+
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
434468
set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF)
435469
endif()
436470

0 commit comments

Comments
 (0)