From 09262784340c165afac4dfbae85c94dc65c5795f Mon Sep 17 00:00:00 2001 From: zenix Date: Wed, 17 May 2023 19:33:57 +0900 Subject: [PATCH 1/5] feature: add blis support --- BLIS.md | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 19 ++++++++++++++ Makefile | 4 +++ README.md | 5 ++++ 4 files changed, 95 insertions(+) create mode 100644 BLIS.md diff --git a/BLIS.md b/BLIS.md new file mode 100644 index 0000000000000..1d64158bbeef0 --- /dev/null +++ b/BLIS.md @@ -0,0 +1,67 @@ +BLIS Installation Manual +------------------------ + +BLIS is a portable software framework for high-performance BLAS-like dense linear algebra libraries. It has received awards and recognition, including the 2023 James H. Wilkinson Prize for Numerical Software and the 2020 SIAM Activity Group on Supercomputing Best Paper Prize. BLIS provides a new BLAS-like API and a compatibility layer for traditional BLAS routine calls. It offers features such as object-based API, typed API, BLAS and CBLAS compatibility layers. + +Project URL: https://github.com/flame/blis + +### Prepare: + +Compile BLIS: + +```bash +git clone https://github.com/flame/blis +cd blis +./configure --enable-cblas -t openmp,pthreads auto +# will install to /usr/local/ by default. +make -j +``` + +Install BLIS: + +```bash +sudo make install +``` + +We recommend using openmp since it's easier to modify the cores been used. + +### llama.cpp compilation + +Makefile: + +```bash +make LLAMA_BLIS=1 -j +# make LLAMA_BLIS=1 benchmark-matmult +``` + +CMake: + +```bash +mkdir build +cd build +cmake -DLLAMA_BLIS=ON .. +make -j +``` + +### llama.cpp execution + +According to the BLIS documentation, we could set the following +environment variables to modify the behavior of openmp: + +``` +export GOMP_GPU_AFFINITY="0-19" +export BLIS_NUM_THREADS=14 +``` + +And then run the binaries as normal. + + +### Intel specific issue + +Some might get the error message saying that `libimf.so` cannot be found. +Please follow this [stackoverflow page](https://stackoverflow.com/questions/70687930/intel-oneapi-2022-libimf-so-no-such-file-or-directory-during-openmpi-compila). + +### Reference: + +1. https://github.com/flame/blis#getting-started +2. https://github.com/flame/blis/blob/master/docs/Multithreading.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 48e3238dfa52e..53b417cbaef61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,7 @@ endif() # 3rd party libs option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) +option(LLAMA_BLIS "llama: use blis" OFF) option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) option(LLAMA_CLBLAST "llama: use CLBlast" OFF) @@ -178,6 +179,24 @@ if (LLAMA_OPENBLAS) endif() endif() +if (LLAMA_BLIS) + add_compile_definitions(GGML_USE_BLIS) + # we don't directly call BLIS apis, use cblas wrapper instead + add_compile_definitions(GGML_USE_OPENBLAS) + set(BLIS_INCLUDE_SEARCH_PATHS + /usr/include + /usr/include/blis + /usr/local/include + /usr/local/include/blis + $ENV{BLIS_HOME} + $ENV{BLIS_HOME}/include + ) + find_path(BLIS_INC NAMES blis.h PATHS ${BLIS_INCLUDE_SEARCH_PATHS}) + add_compile_definitions(BLIS_ENABLE_CBLAS) + add_link_options(-lblis) + add_compile_options(-I${BLIS_INC}) +endif() + if (LLAMA_CUBLAS) cmake_minimum_required(VERSION 3.17) diff --git a/Makefile b/Makefile index f9ec8797a40dc..62615028aec15 100644 --- a/Makefile +++ b/Makefile @@ -122,6 +122,10 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif endif +ifdef LLAMA_BLIS + CFLAGS += -DGGML_USE_OPENBLAS -DGGML_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis + LDFLAGS += -lblis -L/usr/local/lib +endif ifdef LLAMA_CUBLAS CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include diff --git a/README.md b/README.md index 762f4aa0349e5..25dcb84602789 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ The main goal of `llama.cpp` is to run the LLaMA model using 4-bit integer quant - Runs on the CPU - OpenBLAS support - cuBLAS and CLBlast support +- BLIS support (cblas wrapper) The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022). Since then, the project has improved significantly thanks to many contributions. This project is for educational purposes and serves @@ -278,6 +279,10 @@ Building the program with BLAS support may lead to some performance improvements cmake --build . --config Release ``` +- BLIS + + Check [BLIS.md](BLIS.md) for more information. + - cuBLAS This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). From ee72eafdb9205420b6cff9879357d3c8ebd26a6c Mon Sep 17 00:00:00 2001 From: zenix Date: Fri, 19 May 2023 16:04:30 +0900 Subject: [PATCH 2/5] feature: allow all BLA_VENDOR to be assigned in cmake arguments. align with whisper.cpp pr 927 --- BLIS.md | 2 +- CMakeLists.txt | 55 +++++++++++--------------------------------------- Makefile | 2 +- README.md | 16 ++++++++++++--- 4 files changed, 27 insertions(+), 48 deletions(-) diff --git a/BLIS.md b/BLIS.md index 1d64158bbeef0..9b3c3060515db 100644 --- a/BLIS.md +++ b/BLIS.md @@ -39,7 +39,7 @@ CMake: ```bash mkdir build cd build -cmake -DLLAMA_BLIS=ON .. +cmake -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=FLAME .. make -j ``` diff --git a/CMakeLists.txt b/CMakeLists.txt index 53b417cbaef61..ebe04657b10ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason +cmake_minimum_required(VERSION 3.25) # Don't bump this version for no reason project("llama.cpp" C CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -65,8 +65,8 @@ endif() # 3rd party libs option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) -option(LLAMA_BLIS "llama: use blis" OFF) +option(LLAMA_BLAS "llama: use BLAS" OFF) +option(LLAMA_BLAS_VENDOR "llama: BLA_VENDOR from https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" Generic) option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) option(LLAMA_CLBLAST "llama: use CLBlast" OFF) @@ -146,57 +146,26 @@ if (APPLE AND LLAMA_ACCELERATE) endif() endif() -if (LLAMA_OPENBLAS) +if (LLAMA_BLAS) if (LLAMA_STATIC) set(BLA_STATIC ON) endif() - - set(BLA_VENDOR OpenBLAS) + set(BLA_SIZEOF_INTEGRER 8) + set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) find_package(BLAS) if (BLAS_FOUND) - message(STATUS "OpenBLAS found") + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") add_compile_definitions(GGML_USE_OPENBLAS) - add_link_options(${BLAS_LIBRARIES}) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas) - - # find header file - set(OPENBLAS_INCLUDE_SEARCH_PATHS - /usr/include - /usr/include/openblas - /usr/include/openblas-base - /usr/local/include - /usr/local/include/openblas - /usr/local/include/openblas-base - /opt/OpenBLAS/include - $ENV{OpenBLAS_HOME} - $ENV{OpenBLAS_HOME}/include - ) - find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) - add_compile_options(-I${OPENBLAS_INC}) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) + + message("${BLAS_LIBRARIES}") + include_directories(${BLAS_INCLUDE_DIRS}) else() - message(WARNING "OpenBLAS not found") + message(WARNING "BLAS not found, please refer to https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors to set correct LLAMA_BLAS_VENDOR") endif() endif() -if (LLAMA_BLIS) - add_compile_definitions(GGML_USE_BLIS) - # we don't directly call BLIS apis, use cblas wrapper instead - add_compile_definitions(GGML_USE_OPENBLAS) - set(BLIS_INCLUDE_SEARCH_PATHS - /usr/include - /usr/include/blis - /usr/local/include - /usr/local/include/blis - $ENV{BLIS_HOME} - $ENV{BLIS_HOME}/include - ) - find_path(BLIS_INC NAMES blis.h PATHS ${BLIS_INCLUDE_SEARCH_PATHS}) - add_compile_definitions(BLIS_ENABLE_CBLAS) - add_link_options(-lblis) - add_compile_options(-I${BLIS_INC}) -endif() - if (LLAMA_CUBLAS) cmake_minimum_required(VERSION 3.17) diff --git a/Makefile b/Makefile index 62615028aec15..cefa0b4a5dc6f 100644 --- a/Makefile +++ b/Makefile @@ -123,7 +123,7 @@ ifdef LLAMA_OPENBLAS endif endif ifdef LLAMA_BLIS - CFLAGS += -DGGML_USE_OPENBLAS -DGGML_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis + CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis LDFLAGS += -lblis -L/usr/local/lib endif ifdef LLAMA_CUBLAS diff --git a/README.md b/README.md index 25dcb84602789..102cde43fb457 100644 --- a/README.md +++ b/README.md @@ -56,9 +56,8 @@ The main goal of `llama.cpp` is to run the LLaMA model using 4-bit integer quant - Mixed F16 / F32 precision - 4-bit, 5-bit and 8-bit integer quantization support - Runs on the CPU -- OpenBLAS support +- Supports OpenBLAS/Apple BLAS/ARM Performance Lib/ATLAS/BLIS/Intel MKL/NVHPC/ACML/SCSL/SGIMATH and [more](https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors) in BLAS - cuBLAS and CLBlast support -- BLIS support (cblas wrapper) The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022). Since then, the project has improved significantly thanks to many contributions. This project is for educational purposes and serves @@ -275,7 +274,7 @@ Building the program with BLAS support may lead to some performance improvements ```bash mkdir build cd build - cmake .. -DLLAMA_OPENBLAS=ON + cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS cmake --build . --config Release ``` @@ -283,6 +282,17 @@ Building the program with BLAS support may lead to some performance improvements Check [BLIS.md](BLIS.md) for more information. +- Intel MKL + + By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by: + + ```bash + mkdir build + cd build + cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + cmake --build . -config Release + ``` + - cuBLAS This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). From f6b7767f3f63830ac2fe5911c165963d494d170c Mon Sep 17 00:00:00 2001 From: zenix Date: Sat, 20 May 2023 10:09:47 +0900 Subject: [PATCH 3/5] fix: version detection for BLA_SIZEOF_INTEGER, recover min version of cmake --- CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ebe04657b10ec..b7925b6f17792 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.25) # Don't bump this version for no reason +cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason project("llama.cpp" C CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -150,19 +150,24 @@ if (LLAMA_BLAS) if (LLAMA_STATIC) set(BLA_STATIC ON) endif() - set(BLA_SIZEOF_INTEGRER 8) + if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) + set(BLA_SIZEOF_INTEGRER 8) + endif() set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) find_package(BLAS) if (BLAS_FOUND) message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + add_compile_options(${BLAS_LINKER_FLAGS}) add_compile_definitions(GGML_USE_OPENBLAS) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) message("${BLAS_LIBRARIES}") include_directories(${BLAS_INCLUDE_DIRS}) else() - message(WARNING "BLAS not found, please refer to https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors to set correct LLAMA_BLAS_VENDOR") + message(WARNING "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") endif() endif() From 46f01a2855a25e017b55106658bb06246a6846ee Mon Sep 17 00:00:00 2001 From: Zenix Date: Sat, 20 May 2023 17:27:56 +0900 Subject: [PATCH 4/5] Fix typo in INTEGER Co-authored-by: Georgi Gerganov --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7925b6f17792..0876ab90a2208 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ if (LLAMA_BLAS) set(BLA_STATIC ON) endif() if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) - set(BLA_SIZEOF_INTEGRER 8) + set(BLA_SIZEOF_INTEGER 8) endif() set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) find_package(BLAS) From 6b5a4ab9578acf63d4965907262c37b64444cf14 Mon Sep 17 00:00:00 2001 From: zenix Date: Sat, 20 May 2023 22:03:33 +0900 Subject: [PATCH 5/5] Fix: blas changes on ci --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a5938bf93684f..49b478d99ab9d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -165,7 +165,7 @@ jobs: - build: 'clblast' defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' - build: 'openblas' - defines: '-DLLAMA_OPENBLAS=ON -DBLAS_LIBRARIES="/LIBPATH:$env:RUNNER_TEMP/openblas/lib" -DOPENBLAS_INC="$env:RUNNER_TEMP/openblas/include"' + defines: '-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include"' steps: - name: Clone diff --git a/CMakeLists.txt b/CMakeLists.txt index 0876ab90a2208..1c9afed6dc0a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,16 +158,16 @@ if (LLAMA_BLAS) if (BLAS_FOUND) message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") - add_compile_options(${BLAS_LINKER_FLAGS}) + add_compile_options(${BLAS_LINKER_FLAGS}) add_compile_definitions(GGML_USE_OPENBLAS) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) - message("${BLAS_LIBRARIES}") + message("${BLAS_LIBRARIES} ${BLAS_INCLUDE_DIRS}") include_directories(${BLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS not found, please refer to " - "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" - " to set correct LLAMA_BLAS_VENDOR") + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") endif() endif()