|
39 | 39 | #include <unistd.h>
|
40 | 40 | #endif
|
41 | 41 |
|
42 |
| -#if defined(__ARM_FEATURE_SVE) |
43 |
| -int ggml_sve_cnt_b = 0; |
44 |
| -#endif |
45 | 42 | #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
46 | 43 | #undef GGML_USE_LLAMAFILE
|
47 | 44 | #endif
|
@@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
455 | 452 | // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
456 | 453 | float ggml_table_f32_f16[1 << 16];
|
457 | 454 |
|
| 455 | +#if defined(__ARM_ARCH) |
| 456 | +struct ggml_arm_arch_features_type { |
| 457 | + int has_neon; |
| 458 | + int has_i8mm; |
| 459 | + int has_sve; |
| 460 | + int sve_cnt; |
| 461 | +} ggml_arm_arch_features = {-1, -1, -1, 0}; |
| 462 | +#endif |
| 463 | + |
458 | 464 | GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
459 | 465 | switch (status) {
|
460 | 466 | case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
@@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {
|
3673 | 3679 |
|
3674 | 3680 | ////////////////////////////////////////////////////////////////////////////////
|
3675 | 3681 |
|
| 3682 | +#if defined(__ARM_ARCH) |
| 3683 | + |
| 3684 | +#if defined(__linux__) && defined(__aarch64__) |
| 3685 | +#include <sys/auxv.h> |
| 3686 | +#elif defined(__APPLE__) |
| 3687 | +#include <sys/sysctl.h> |
| 3688 | +#endif |
| 3689 | + |
| 3690 | +static void ggml_init_arm_arch_features(void) { |
| 3691 | +#if defined(__linux__) && defined(__aarch64__) |
| 3692 | + uint32_t hwcap = getauxval(AT_HWCAP); |
| 3693 | + uint32_t hwcap2 = getauxval(AT_HWCAP2); |
| 3694 | + |
| 3695 | + ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD); |
| 3696 | + ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); |
| 3697 | + ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE); |
| 3698 | + |
| 3699 | +#if defined(__ARM_FEATURE_SVE) |
| 3700 | + ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); |
| 3701 | +#endif |
| 3702 | +#elif defined(__APPLE__) |
| 3703 | + int oldp = 0; |
| 3704 | + size_t size = sizeof(oldp); |
| 3705 | + if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) { |
| 3706 | + oldp = 0; |
| 3707 | + } |
| 3708 | + ggml_arm_arch_features.has_neon = oldp; |
| 3709 | + |
| 3710 | + if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) { |
| 3711 | + oldp = 0; |
| 3712 | + } |
| 3713 | + ggml_arm_arch_features.has_i8mm = oldp; |
| 3714 | + |
| 3715 | + ggml_arm_arch_features.has_sve = 0; |
| 3716 | + ggml_arm_arch_features.sve_cnt = 0; |
| 3717 | +#else |
| 3718 | +// Run-time CPU feature detection not implemented for this platform, fallback to compile time |
| 3719 | +#if defined(__ARM_NEON) |
| 3720 | + ggml_arm_arch_features.has_neon = 1; |
| 3721 | +#else |
| 3722 | + ggml_arm_arch_features.has_neon = 0; |
| 3723 | +#endif |
| 3724 | + |
| 3725 | +#if defined(__ARM_FEATURE_MATMUL_INT8) |
| 3726 | + ggml_arm_arch_features.has_i8mm = 1; |
| 3727 | +#else |
| 3728 | + ggml_arm_arch_features.has_i8mm = 0; |
| 3729 | +#endif |
| 3730 | + |
| 3731 | +#if defined(__ARM_FEATURE_SVE) |
| 3732 | + ggml_arm_arch_features.has_sve = 1; |
| 3733 | + ggml_arm_arch_features.sve_cnt = 16; |
| 3734 | +#else |
| 3735 | + ggml_arm_arch_features.has_sve = 0; |
| 3736 | + ggml_arm_arch_features.sve_cnt = 0; |
| 3737 | +#endif |
| 3738 | +#endif |
| 3739 | +} |
| 3740 | +#endif |
| 3741 | + |
3676 | 3742 | struct ggml_context * ggml_init(struct ggml_init_params params) {
|
3677 | 3743 | // make this function thread safe
|
3678 | 3744 | ggml_critical_section_start();
|
@@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
3723 | 3789 | GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
3724 | 3790 | }
|
3725 | 3791 |
|
| 3792 | +#if defined(__ARM_ARCH) |
| 3793 | + ggml_init_arm_arch_features(); |
| 3794 | +#endif |
| 3795 | + |
3726 | 3796 | is_first_call = false;
|
3727 | 3797 | }
|
3728 | 3798 |
|
@@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
3771 | 3841 |
|
3772 | 3842 | GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
3773 | 3843 |
|
3774 |
| -#if defined(__ARM_FEATURE_SVE) |
3775 |
| - if (!ggml_sve_cnt_b) { |
3776 |
| - ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); |
3777 |
| - } |
3778 |
| -#endif |
3779 |
| - |
3780 | 3844 | GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
3781 | 3845 |
|
3782 | 3846 | ggml_critical_section_end();
|
@@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
|
23578 | 23642 | }
|
23579 | 23643 |
|
23580 | 23644 | int ggml_cpu_has_neon(void) {
|
23581 |
| -#if defined(__ARM_NEON) |
23582 |
| - return 1; |
| 23645 | +#if defined(__ARM_ARCH) |
| 23646 | + return ggml_arm_arch_features.has_neon; |
23583 | 23647 | #else
|
23584 | 23648 | return 0;
|
23585 | 23649 | #endif
|
23586 | 23650 | }
|
23587 | 23651 |
|
23588 | 23652 | int ggml_cpu_has_sve(void) {
|
23589 |
| -#if defined(__ARM_FEATURE_SVE) |
23590 |
| - return 1; |
| 23653 | +#if defined(__ARM_ARCH) |
| 23654 | + return ggml_arm_arch_features.has_sve; |
23591 | 23655 | #else
|
23592 | 23656 | return 0;
|
23593 | 23657 | #endif
|
@@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
|
23734 | 23798 | }
|
23735 | 23799 |
|
23736 | 23800 | int ggml_cpu_has_matmul_int8(void) {
|
23737 |
| -#if defined(__ARM_FEATURE_MATMUL_INT8) |
23738 |
| - return 1; |
| 23801 | +#if defined(__ARM_ARCH) |
| 23802 | + return ggml_arm_arch_features.has_i8mm; |
23739 | 23803 | #else
|
23740 | 23804 | return 0;
|
23741 | 23805 | #endif
|
23742 | 23806 | }
|
23743 | 23807 |
|
| 23808 | +int ggml_cpu_get_sve_cnt(void) { |
| 23809 | +#if defined(__ARM_ARCH) |
| 23810 | + return ggml_arm_arch_features.sve_cnt; |
| 23811 | +#else |
| 23812 | + return 0; |
| 23813 | +#endif |
| 23814 | +} |
23744 | 23815 | ////////////////////////////////////////////////////////////////////////////////
|
0 commit comments