1
+ /*
2
+ * Copyright (c) 2025 The ggml authors
3
+ *
4
+ * Qualcomm Hexagon SDK and reference tech guides could be found at:
5
+ * https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
6
+ *
7
+ * this single-source-file or self-contained file is implementation of ggml-dsp:
8
+ * - a customized tiny ggml running on Qualcomm Hexagon cDSP
9
+ * - ported from original ggml
10
+ *
11
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ * of this software and associated documentation files (the "Software"), to
13
+ * deal in the Software without restriction, including without limitation the
14
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15
+ * sell copies of the Software, and to permit persons to whom the Software is
16
+ * furnished to do so, subject to the following conditions:
17
+ *
18
+ * The above copyright notice and this permission notice shall be included in
19
+ * all copies or substantial portions of the Software.
20
+ *
21
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27
+ * IN THE SOFTWARE.
28
+ */
1
29
#include "ggml-dsp.h"
2
30
3
- // =================================================================================================
4
- // tiny ggml-dsp, ported from original ggml
5
- // =================================================================================================
6
- static int32 g_thread_counts = 1 ;
7
-
8
31
void ggmlhexagon_log_internal (int level , const char * file , const char * func , int line , const char * format , ...) {
9
32
#if !GGMLHEXAGON_DEBUG
10
33
return ;
@@ -30,7 +53,7 @@ void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
30
53
char tmpbuf [GGMLHEXAGON_LOGBUF_LEN ];
31
54
size_t buflen = 0 ;
32
55
if (tensor -> type == GGML_TYPE_F32 ) {
33
- memset (tmpbuf , 0 , GGMLHEXAGON_LOG_LEVEL_DEBUG );
56
+ memset (tmpbuf , 0 , GGMLHEXAGON_LOGBUF_LEN );
34
57
for (int h = 0 ; h < tensor -> ne [3 ]; h ++ ) {
35
58
for (int i = 0 ; i < tensor -> ne [2 ]; i ++ ) {
36
59
for (int j = 0 ; j < tensor -> ne [1 ]; j ++ ) {
@@ -173,116 +196,3 @@ int64_t ggml_time_ms(void) {
173
196
int64_t ggml_time_us (void ) {
174
197
return hexagon_perf_get_time_us ();
175
198
}
176
-
177
- int ggmlop_get_thread_counts (void ) {
178
- return g_thread_counts ;
179
- }
180
-
181
- // =================================================================================================
182
- // implementation of ggml-hexagon kernel skel function
183
- // =================================================================================================
184
- int ggmlop_dsp_open (const char * uri , remote_handle64 * handle ) {
185
- void * tptr = NULL ;
186
- GGMLHEXAGON_LOG_DEBUG ("uri %s" , uri );
187
- tptr = (void * )malloc (1 );
188
- * handle = (remote_handle64 )tptr ;
189
- assert (* handle );
190
-
191
- GGMLHEXAGON_LOG_DEBUG ("api_version = 0x%x" , qurt_api_version ());
192
- GGMLHEXAGON_LOG_DEBUG ("hvx units = 0x%d" , qurt_hvx_get_units ());
193
- qurt_arch_version_t vers ;
194
- qurt_sysenv_get_arch_version (& vers );
195
- GGMLHEXAGON_LOG_DEBUG ("arch_version=0x%x" , vers .arch_version );
196
- qurt_sysenv_app_heap_t aheap ;
197
- qurt_sysenv_get_app_heap (& aheap );
198
- GGMLHEXAGON_LOG_DEBUG ("aheap.heap_base=0x%x, aheap.heap_limit=0x%x" , aheap .heap_base , aheap .heap_limit );
199
- qurt_sysenv_max_hthreads_t mhwt ;
200
- qurt_sysenv_get_max_hw_threads (& mhwt );
201
- GGMLHEXAGON_LOG_DEBUG ("max hardware threads counts=%d" , mhwt .max_hthreads );
202
- g_thread_counts = mhwt .max_hthreads ;
203
-
204
- return 0 ;
205
- }
206
-
207
- int ggmlop_dsp_close (remote_handle64 handle ) {
208
- if (handle )
209
- free ((void * )handle );
210
-
211
- return 0 ;
212
- }
213
-
214
- AEEResult ggmlop_dsp_setclocks (remote_handle64 handle , int32 power_level , int32 latency , int32 dcvs_enabled , int32 thread_counts ) {
215
- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
216
- HAP_power_request_t request ;
217
- memset (& request , 0 , sizeof (HAP_power_request_t ));
218
- request .type = HAP_power_set_apptype ;
219
- request .apptype = HAP_POWER_COMPUTE_CLIENT_CLASS ;
220
-
221
- GGMLHEXAGON_LOG_DEBUG ("user specified thread_counts %d" , thread_counts );
222
- if (thread_counts > 1 )
223
- g_thread_counts = (thread_counts > g_thread_counts ) ? g_thread_counts : thread_counts ;
224
- else
225
- g_thread_counts = 1 ;
226
- GGMLHEXAGON_LOG_DEBUG ("real thread_counts %d" , g_thread_counts );
227
-
228
- void * ggmop_ctx = (void * )(handle );
229
- int retval = HAP_power_set (ggmop_ctx , & request );
230
- if (retval ) {
231
- GGMLHEXAGON_LOG_DEBUG ("failed first power vote" );
232
- return AEE_EFAILED ;
233
- }
234
-
235
- //configure clocks & DCVS mode
236
- memset (& request , 0 , sizeof (HAP_power_request_t ));
237
- request .type = HAP_power_set_DCVS_v2 ;
238
- request .dcvs_v2 .dcvs_enable = TRUE;
239
- request .dcvs_v2 .dcvs_params .target_corner = (HAP_dcvs_voltage_corner_t )power_level ;
240
- if (dcvs_enabled ) {
241
- request .dcvs_v2 .dcvs_params .min_corner = HAP_DCVS_VCORNER_DISABLE ;
242
- request .dcvs_v2 .dcvs_params .max_corner = HAP_DCVS_VCORNER_DISABLE ;
243
- } else {
244
- request .dcvs_v2 .dcvs_params .min_corner = request .dcvs_v2 .dcvs_params .target_corner ;
245
- request .dcvs_v2 .dcvs_params .max_corner = request .dcvs_v2 .dcvs_params .target_corner ;
246
- }
247
- request .dcvs_v2 .dcvs_option = HAP_DCVS_V2_PERFORMANCE_MODE ;
248
- request .dcvs_v2 .set_dcvs_params = TRUE;
249
- request .dcvs_v2 .set_latency = TRUE;
250
- request .dcvs_v2 .latency = latency ;
251
- retval = HAP_power_set (ggmop_ctx , & request );
252
- if (retval ) {
253
- GGMLHEXAGON_LOG_DEBUG ("failed to vote for performance mode" );
254
- return AEE_EFAILED ;
255
- }
256
-
257
- memset (& request , 0 , sizeof (HAP_power_request_t ));
258
- request .type = HAP_power_set_HVX ;
259
- request .hvx .power_up = TRUE;
260
- retval = HAP_power_set (ggmop_ctx , & request );
261
- if (retval ) {
262
- GGMLHEXAGON_LOG_DEBUG ("failed to vote for HVX power" );
263
- return AEE_EFAILED ;
264
- }
265
- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
266
- return AEE_SUCCESS ;
267
- }
268
-
269
- // =================================================================================================
270
- // implementation of ggml-hexagon kernel, it's better to put every hexagon-kernel to a single file
271
- // =================================================================================================
272
- int ggmlop_dsp_softmax (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
273
- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
274
- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
275
- return 0 ;
276
- }
277
-
278
- int ggmlop_dsp_rmsnorm (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
279
- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
280
- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
281
- return 0 ;
282
- }
283
-
284
- int ggmlop_dsp_pool2d (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
285
- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
286
- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
287
- return 0 ;
288
- }
0 commit comments