Skip to content

Commit 9bd980e

Browse files
committed
ggml-qnn: pr to upstream
1 parent 4638373 commit 9bd980e

10 files changed

+1583
-1625
lines changed

CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ include(CheckIncludeFileCXX)
66
set(CMAKE_WARN_UNUSED_CLI YES)
77

88
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9-
set(CMAKE_VERBOSE_MAKEFILE on)
109

1110
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
1211
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)

cmake/aarch64-w64-mingw32.cmake

-18
This file was deleted.

cmake/arm64-windows-cygwin.cmake

-16
This file was deleted.

cmake/arm64-windows-llvm.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ set( CMAKE_CXX_COMPILER clang++ )
99
set( CMAKE_C_COMPILER_TARGET ${target} )
1010
set( CMAKE_CXX_COMPILER_TARGET ${target} )
1111

12-
#set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13-
#set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
12+
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
1414

1515
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
1616
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )

ggml/src/ggml-qnn/CMakeLists.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
message(STATUS "Using QNN backend")
2+
message("CMAKE_SYSTEM_NAME : ${CMAKE_SYSTEM_NAME}")
23

34
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
45
find_library(LOG_LIB log)
56
set(QNN_LINK_LIBRARIES ${LOG_LIB})
67
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
78
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
89
set(QNN_DEFAULT_LIB_SEARCH_PATH "C:\\" CACHE STRING "customized library search path for QNN backend")
9-
elseif(CMAKE_SYSTEM_NAME STREQUAL "CYGWIN")
10-
set(QNN_DEFAULT_LIB_SEARCH_PATH "/cygdrive/c/qairt/2.31.0.250130/" CACHE STRING "customized library search path for QNN backend")
1110
else()
1211
message(FATAL_ERROR "QNN now only available on Android and Windows(Windows on ARM)")
1312
endif()

ggml/src/ggml-qnn/ggml-qnn.cpp

+1,518-1,088
Large diffs are not rendered by default.

scripts/build-run-android.sh

+53-17
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
1414
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
1515
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
1616
QNN_SDK_URL=https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
17-
QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.31.0.250130/
17+
QNN_SDK_INSTALL_PATH=/opt/qcom/aistack/qairt/
18+
QNN_SDK_VERSION=2.32.0.250228
19+
QNN_SDK_PATH=${QNN_SDK_INSTALL_PATH}/${QNN_SDK_VERSION}
1820

1921
#default is QNN NPU
2022
qnnbackend=2
@@ -32,11 +34,35 @@ function show_pwd()
3234
}
3335

3436

35-
function check_qnn_sdk()
37+
function check_and_download_qnn_sdk()
3638
{
39+
is_qnn_sdk_exist=1
40+
3741
if [ ! -d ${QNN_SDK_PATH} ]; then
38-
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check or download it from ${QNN_SDK_URL}...\n"
39-
exit 1
42+
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, download it from ${QNN_SDK_URL}...\n"
43+
is_qnn_sdk_exist=0
44+
fi
45+
46+
if [ ! -f ${QNN_SDK_PATH}/sdk.yaml ]; then
47+
is_qnn_sdk_exist=0
48+
fi
49+
50+
if [ ${is_qnn_sdk_exist} -eq 0 ]; then
51+
echo "sudo mkdir -p ${QNN_SDK_INSTALL_PATH}"
52+
sudo mkdir -p ${QNN_SDK_INSTALL_PATH}
53+
if [ ! -f v${QNN_SDK_VERSION}.zip ]; then
54+
wget --no-config --quiet --show-progress -O v${QNN_SDK_VERSION}.zip https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/${QNN_SDK_VERSION}/v${QNN_SDK_VERSION}.zip
55+
fi
56+
unzip v${QNN_SDK_VERSION}.zip
57+
if [ $? -ne 0 ]; then
58+
printf "failed to download Qualcomm QNN SDK to %s \n" "${QNN_SDK_PATH}"
59+
exit 1
60+
fi
61+
sudo mv qairt/${QNN_SDK_VERSION} ${QNN_SDK_INSTALL_PATH}/
62+
printf "Qualcomm QNN SDK saved to ${QNN_SDK_PATH} \n\n"
63+
sudo rm -rf qairt
64+
else
65+
printf "Qualcomm QNN SDK already exist:${QNN_SDK_PATH} \n\n"
4066
fi
4167
}
4268

@@ -75,7 +101,7 @@ function check_and_download_ndk()
75101

76102
function build_arm64
77103
{
78-
cmake -H. -B./out/android -DCMAKE_BUILD_TYPE=Release -DGGML_USE_QNN=ON -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
104+
cmake -H. -B./out/android -DCMAKE_BUILD_TYPE=Release -DGGML_OPENMP=OFF -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
79105
cd out/android
80106
make -j16
81107
show_pwd
@@ -97,11 +123,14 @@ function check_qnn_libs()
97123
{
98124
#reuse the cached qnn libs on Android phone
99125
adb shell ls ${REMOTE_PATH}/libQnnCpu.so
126+
adb shell ls ${REMOTE_PATH}/libQnnGpu.so
127+
adb shell ls ${REMOTE_PATH}/libQnnHtp.so
100128
if [ $? -eq 0 ]; then
101129
printf "QNN libs already exist on Android phone\n"
102130
else
103131
update_qnn_libs
104132
fi
133+
update_qnn_cfg
105134
}
106135

107136

@@ -119,11 +148,17 @@ function update_qnn_libs()
119148
}
120149

121150

151+
function update_qnn_cfg()
152+
{
153+
adb push ./scripts/ggml-qnn.cfg ${REMOTE_PATH}/
154+
}
155+
156+
122157
function build_ggml_qnn()
123158
{
124159
show_pwd
125160
check_and_download_ndk
126-
check_qnn_sdk
161+
check_and_download_qnn_sdk
127162
dump_vars
128163
remove_temp_dir
129164
build_arm64
@@ -140,21 +175,20 @@ function prepare_run_on_phone()
140175

141176
check_qnn_libs
142177

143-
if [ -f ./out/android/bin/libggml-qnn.so ]; then
178+
if [ -f ./out/android/bin/libggml-cpu.so ]; then
144179
adb push ./out/android/bin/*.so ${REMOTE_PATH}/
145180
fi
146181
adb push ./out/android/bin/${program} ${REMOTE_PATH}/
147182
adb shell chmod +x ${REMOTE_PATH}/${program}
148183
}
149184

150-
151185
function run_llamacli()
152186
{
153187
prepare_run_on_phone llama-cli
154188

155189
adb shell "cd ${REMOTE_PATH} \
156190
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
157-
&& ${REMOTE_PATH}/llama-cli -mg ${qnnbackend} -no-cnv -m ${GGUF_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\""
191+
&& ${REMOTE_PATH}/llama-cli -mg ${qnnbackend} -ngl 99 -no-cnv -m ${GGUF_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\""
158192

159193
}
160194

@@ -213,7 +247,6 @@ function run_test-op()
213247

214248
}
215249

216-
217250
function print_oplist()
218251
{
219252
oplist="DUP
@@ -302,7 +335,7 @@ function show_usage()
302335
echo " $0 build"
303336
echo " $0 updateqnnlib"
304337
echo " $0 run_testops"
305-
echo " $0 run_testop [ADD/MUL/MUL_MAT/...(op from print_oplist)] [0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU)]"
338+
echo " $0 run_testop [ADD/MUL/MUL_MAT......(op from print_oplist)] [0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU)]"
306339
echo " $0 run_llamacli 0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU) / 3 (ggml)"
307340
echo " $0 run_llamabench 0 (QNN_CPU) / 1 (QNN_GPU) / 2 (QNN_NPU) / 3 (ggml)"
308341

@@ -312,7 +345,8 @@ function show_usage()
312345

313346
show_pwd
314347

315-
check_qnn_sdk
348+
check_and_download_ndk
349+
check_and_download_qnn_sdk
316350

317351
if [ $# == 0 ]; then
318352
show_usage
@@ -343,20 +377,22 @@ elif [ $# == 1 ]; then
343377
fi
344378
elif [ $# == 2 ]; then
345379
qnnbackend=$2
380+
if [ ${qnnbackend} -gt 3 ]; then
381+
show_usage
382+
exit 1
383+
fi
384+
346385
if [ "$1" == "run_llamacli" ]; then
347386
run_llamacli
348387
exit 0
349388
elif [ "$1" == "run_llamabench" ]; then
350389
run_llamabench
351390
exit 0
352-
exit 0
353-
else
354-
show_usage
355-
exit 1
356391
fi
357392
elif [ $# == 3 ]; then
358-
#opname can be found via print_oplist:
359393
opname=$2
394+
#TODO: check opname in oplist
395+
#opname can be found via print_oplist:
360396

361397
qnnbackend=$3
362398
if [ ${qnnbackend} -gt 3 ]; then

scripts/ggml-qnn.cfg

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[general]
2+
# enable/disable QNN's internal log
3+
print_qnn_internal_log = 0
4+
# 0: general approach,similar to ggml-sycl or ggml-cann
5+
# 1: mapping entire ggml cgraph to QNN graph
6+
inference_approach = 0
7+
8+
[npu]
9+
npu_inference_datatype = "fp16"

tests/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ llama_target_and_test(test-chat-template.cpp)
137137
# llama_target_and_test(test-opt.cpp) # SLOW
138138
llama_target_and_test(test-gguf.cpp)
139139
llama_target_and_test(test-backend-ops.cpp)
140-
llama_target_and_test(ggml-qnn-ut.cpp)
141140

142141
llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
143142
llama_target_and_test(test-autorelease.cpp LABEL "model")

0 commit comments

Comments
 (0)