Skip to content

Commit 3202361

Browse files
authored
ggml, ci : Windows ARM runner and build fixes (ggml-org#5979)
* windows arm ci * fix `error C2078: too many initializers` with ggml_vld1q_u32 macro for MSVC ARM64 * fix `warning C4146: unary minus operator applied to unsigned type, result still unsigned` * fix `error C2065: '__fp16': undeclared identifier`
1 parent 332bdfd commit 3202361

File tree

5 files changed

+21
-15
lines changed

5 files changed

+21
-15
lines changed

.github/workflows/build.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,8 @@ jobs:
425425
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
426426
- build: 'vulkan'
427427
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
428+
- build: 'arm64'
429+
defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
428430

429431
steps:
430432
- name: Clone
@@ -520,7 +522,7 @@ jobs:
520522
- name: Test
521523
id: cmake_test
522524
# not all machines have native AVX-512
523-
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
525+
if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
524526
run: |
525527
cd build
526528
ctest -L main -C Release --verbose --timeout 900

ggml-impl.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,30 @@ extern "C" {
5353
//
5454
#include <arm_neon.h>
5555

56+
typedef __fp16 ggml_fp16_internal_t;
57+
5658
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
5759
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
5860

5961
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
6062

6163
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
62-
__fp16 tmp;
64+
ggml_fp16_internal_t tmp;
6365
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
6466
return (float)tmp;
6567
}
6668

6769
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
6870
ggml_fp16_t res;
69-
__fp16 tmp = f;
71+
ggml_fp16_internal_t tmp = f;
7072
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
7173
return res;
7274
}
7375

7476
#else
7577

78+
typedef uint16_t ggml_fp16_internal_t;
79+
7680
#ifdef __wasm_simd128__
7781
#include <wasm_simd128.h>
7882
#else

ggml-quants.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -9374,15 +9374,15 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
93749374

93759375
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
93769376
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
9377-
const uint32x4_t aux32x4_0 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
9378-
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
9379-
const uint32x4_t aux32x4_1 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
9380-
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
9377+
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
9378+
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
9379+
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
9380+
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
93819381
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
9382-
const uint32x4_t aux32x4_2 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
9383-
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
9384-
const uint32x4_t aux32x4_3 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
9385-
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
9382+
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
9383+
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
9384+
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
9385+
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
93869386

93879387

93889388
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));

ggml.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,7 @@ inline static float vaddvq_f32(float32x4_t v) {
857857
#define GGML_F16x8 float16x8_t
858858
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
859859
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
860-
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
860+
#define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x))
861861
#define GGML_F16x8_STORE vst1q_f16
862862
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
863863
#define GGML_F16x8_ADD vaddq_f16
@@ -900,7 +900,7 @@ inline static float vaddvq_f32(float32x4_t v) {
900900
#define GGML_F32Cx4 float32x4_t
901901
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
902902
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
903-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
903+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x)))
904904
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
905905
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
906906
#define GGML_F32Cx4_ADD vaddq_f32

llama.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -13980,7 +13980,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
1398013980
} else if (llama_is_user_defined_token(model->vocab, token)) {
1398113981
std::string result = model->vocab.id_to_token[token].text;
1398213982
if (length < (int) result.length()) {
13983-
return -result.length();
13983+
return -(int) result.length();
1398413984
}
1398513985
memcpy(buf, result.c_str(), result.length());
1398613986
return result.length();
@@ -14015,7 +14015,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
1401514015
} else if (llama_is_user_defined_token(model->vocab, token)) {
1401614016
std::string result = model->vocab.id_to_token[token].text;
1401714017
if (length < (int) result.length()) {
14018-
return -result.length();
14018+
return -(int) result.length();
1401914019
}
1402014020
memcpy(buf, result.c_str(), result.length());
1402114021
return result.length();

0 commit comments

Comments
 (0)