Skip to content

Commit fbf1dde

Browse files
cebtenzzreniansamanyosoapage43ToKiNoBug
authored
Nomic Vulkan backend (ggml-org#4456)
Signed-off-by: Jared Van Bortel <[email protected]> Co-authored-by: niansa <[email protected]> Co-authored-by: Adam Treat <[email protected]> Co-authored-by: Aaron Miller <[email protected]> Co-authored-by: ToKiNoBug <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]> Co-authored-by: slaren <[email protected]>
1 parent 2aed77e commit fbf1dde

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4271
-19
lines changed

.ecrc

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"Exclude": ["^\\.gitmodules$"],
23
"Disable": {
34
"IndentSize": true
45
}

.github/workflows/build.yml

+20-1
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ jobs:
337337
OPENCL_VERSION: 2023.04.17
338338
CLBLAST_VERSION: 1.6.0
339339
SDE_VERSION: 9.33.0-2024-01-07
340+
VULKAN_VERSION: 1.3.261.1
340341

341342
strategy:
342343
matrix:
@@ -353,6 +354,8 @@ jobs:
353354
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
354355
- build: 'openblas'
355356
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
357+
- build: 'kompute'
358+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
356359

357360
steps:
358361
- name: Clone
@@ -361,6 +364,12 @@ jobs:
361364
with:
362365
fetch-depth: 0
363366

367+
- name: Clone Kompute submodule
368+
id: clone_kompute
369+
if: ${{ matrix.build == 'kompute' }}
370+
run: |
371+
git submodule update --init kompute
372+
364373
- name: Download OpenCL SDK
365374
id: get_opencl
366375
if: ${{ matrix.build == 'clblast' }}
@@ -395,6 +404,15 @@ jobs:
395404
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
396405
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
397406
407+
- name: Install Vulkan SDK
408+
id: get_vulkan
409+
if: ${{ matrix.build == 'kompute' }}
410+
run: |
411+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
412+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
413+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
414+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
415+
398416
- name: Build
399417
id: cmake_build
400418
run: |
@@ -432,7 +450,8 @@ jobs:
432450
433451
- name: Test
434452
id: cmake_test
435-
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
453+
# not all machines have native AVX-512
454+
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
436455
run: |
437456
cd build
438457
ctest -L main -C Release --verbose --timeout 900

.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "kompute"]
2+
path = kompute
3+
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

+163-8
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ option(LLAMA_VULKAN "llama: use Vulkan"
103103
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
104104
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
105105
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
106+
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
106107
option(LLAMA_MPI "llama: use MPI" OFF)
107108
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
108109
option(LLAMA_SYCL "llama: use SYCL" OFF)
@@ -484,7 +485,6 @@ if (LLAMA_HIPBLAS)
484485
endif()
485486
endif()
486487

487-
488488
if (LLAMA_SYCL)
489489
if ( NOT DEFINED ENV{ONEAPI_ROOT})
490490
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
@@ -510,6 +510,160 @@ if (LLAMA_SYCL)
510510
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
511511
endif()
512512

513+
if (LLAMA_KOMPUTE)
514+
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
515+
find_package(Vulkan COMPONENTS glslc REQUIRED)
516+
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
517+
if (NOT glslc_executable)
518+
message(FATAL_ERROR "glslc not found")
519+
endif()
520+
521+
function(compile_shader)
522+
set(options)
523+
set(oneValueArgs)
524+
set(multiValueArgs SOURCES)
525+
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
526+
foreach(source ${compile_shader_SOURCES})
527+
get_filename_component(filename ${source} NAME)
528+
set(spv_file ${filename}.spv)
529+
add_custom_command(
530+
OUTPUT ${spv_file}
531+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
532+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
533+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
534+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
535+
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
536+
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
537+
COMMENT "Compiling ${source} to ${spv_file}"
538+
)
539+
540+
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
541+
set(FILE_NAME "shader${RAW_FILE_NAME}")
542+
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
543+
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
544+
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
545+
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
546+
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
547+
if(CMAKE_GENERATOR MATCHES "Visual Studio")
548+
add_custom_command(
549+
OUTPUT ${OUTPUT_HEADER_FILE}
550+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
551+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
552+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
553+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
554+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
555+
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
556+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
557+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
558+
DEPENDS ${spv_file} xxd
559+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
560+
)
561+
else()
562+
add_custom_command(
563+
OUTPUT ${OUTPUT_HEADER_FILE}
564+
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
565+
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
566+
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
567+
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
568+
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
569+
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
570+
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
571+
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
572+
DEPENDS ${spv_file} xxd
573+
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
574+
)
575+
endif()
576+
endforeach()
577+
endfunction()
578+
579+
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
580+
message(STATUS "Kompute found")
581+
set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
582+
add_subdirectory(kompute)
583+
584+
# Compile our shaders
585+
compile_shader(SOURCES
586+
kompute-shaders/op_scale.comp
587+
kompute-shaders/op_scale_8.comp
588+
kompute-shaders/op_add.comp
589+
kompute-shaders/op_addrow.comp
590+
kompute-shaders/op_mul.comp
591+
kompute-shaders/op_silu.comp
592+
kompute-shaders/op_relu.comp
593+
kompute-shaders/op_gelu.comp
594+
kompute-shaders/op_softmax.comp
595+
kompute-shaders/op_norm.comp
596+
kompute-shaders/op_rmsnorm.comp
597+
kompute-shaders/op_diagmask.comp
598+
kompute-shaders/op_mul_mat_mat_f32.comp
599+
kompute-shaders/op_mul_mat_f16.comp
600+
kompute-shaders/op_mul_mat_q8_0.comp
601+
kompute-shaders/op_mul_mat_q4_0.comp
602+
kompute-shaders/op_mul_mat_q4_1.comp
603+
kompute-shaders/op_mul_mat_q6_k.comp
604+
kompute-shaders/op_getrows_f16.comp
605+
kompute-shaders/op_getrows_q4_0.comp
606+
kompute-shaders/op_getrows_q4_1.comp
607+
kompute-shaders/op_getrows_q6_k.comp
608+
kompute-shaders/op_rope_f16.comp
609+
kompute-shaders/op_rope_f32.comp
610+
kompute-shaders/op_cpy_f16_f16.comp
611+
kompute-shaders/op_cpy_f16_f32.comp
612+
kompute-shaders/op_cpy_f32_f16.comp
613+
kompute-shaders/op_cpy_f32_f32.comp
614+
)
615+
616+
# Create a custom target for our generated shaders
617+
add_custom_target(generated_shaders DEPENDS
618+
shaderop_scale.h
619+
shaderop_scale_8.h
620+
shaderop_add.h
621+
shaderop_addrow.h
622+
shaderop_mul.h
623+
shaderop_silu.h
624+
shaderop_relu.h
625+
shaderop_gelu.h
626+
shaderop_softmax.h
627+
shaderop_norm.h
628+
shaderop_rmsnorm.h
629+
shaderop_diagmask.h
630+
shaderop_mul_mat_mat_f32.h
631+
shaderop_mul_mat_f16.h
632+
shaderop_mul_mat_q8_0.h
633+
shaderop_mul_mat_q4_0.h
634+
shaderop_mul_mat_q4_1.h
635+
shaderop_mul_mat_q6_k.h
636+
shaderop_getrows_f16.h
637+
shaderop_getrows_q4_0.h
638+
shaderop_getrows_q4_1.h
639+
shaderop_getrows_q6_k.h
640+
shaderop_rope_f16.h
641+
shaderop_rope_f32.h
642+
shaderop_cpy_f16_f16.h
643+
shaderop_cpy_f16_f32.h
644+
shaderop_cpy_f32_f16.h
645+
shaderop_cpy_f32_f32.h
646+
)
647+
648+
# Create a custom command that depends on the generated_shaders
649+
add_custom_command(
650+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
651+
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
652+
DEPENDS generated_shaders
653+
COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
654+
)
655+
656+
# Add the stamp to the main sources to ensure dependency tracking
657+
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
658+
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
659+
add_compile_definitions(GGML_USE_KOMPUTE)
660+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
661+
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
662+
else()
663+
message(WARNING "Kompute not found")
664+
endif()
665+
endif()
666+
513667
function(get_flags CCID CCVER)
514668
set(C_FLAGS "")
515669
set(CXX_FLAGS "")
@@ -852,13 +1006,14 @@ add_library(ggml OBJECT
8521006
ggml-backend.h
8531007
ggml-quants.c
8541008
ggml-quants.h
855-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
856-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
857-
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
858-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
859-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
860-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
861-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1009+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1010+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1011+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1012+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1013+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1014+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1015+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1016+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
8621017
)
8631018

8641019
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

ggml-backend.c

+5
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
373373
extern GGML_CALL int ggml_backend_vk_reg_devices(void);
374374
ggml_backend_vk_reg_devices();
375375
#endif
376+
377+
#ifdef GGML_USE_KOMPUTE
378+
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
379+
ggml_backend_kompute_reg_devices();
380+
#endif
376381
}
377382

378383
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {

0 commit comments

Comments
 (0)