Skip to content

Commit 33c1bfb

Browse files
Better vectorization and crc64. Cleaned up cmake and added better runtime cpu detection (#1083)
Co-authored-by: Alfred G <[email protected]> Co-authored-by: Alfred Gedeon <[email protected]>
1 parent 2fd6652 commit 33c1bfb

File tree

10 files changed

+178
-34
lines changed

10 files changed

+178
-34
lines changed

CMakeLists.txt

+9-5
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,17 @@ if (USE_CPU_EXTENSIONS)
152152
)
153153
endif()
154154
elseif (AWS_ARCH_ARM64 OR AWS_ARCH_ARM32)
155-
if (MSVC)
155+
if (WINDOWS)
156156
file(GLOB AWS_COMMON_ARCH_SRC
157-
"source/arch/arm/msvc/*.c"
157+
"source/arch/arm/windows/*.c"
158158
)
159-
elseif (AWS_HAVE_AUXV)
159+
elseif(APPLE)
160+
file(GLOB AWS_COMMON_ARCH_SRC
161+
"source/arch/arm/darwin/*.c"
162+
)
163+
else()
160164
file(GLOB AWS_COMMON_ARCH_SRC
161-
"source/arch/arm/asm/*.c"
165+
"source/arch/arm/auxv/*.c"
162166
)
163167
endif()
164168
endif()
@@ -221,7 +225,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE -DCJSON_HIDE_SYMBOLS)
221225

222226
if (AWS_HAVE_AVX2_INTRINSICS)
223227
target_compile_definitions(${PROJECT_NAME} PRIVATE -DUSE_SIMD_ENCODING)
224-
simd_add_source_avx(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c")
228+
simd_append_source_and_features(${PROJECT_NAME} "source/arch/intel/encoding_avx2.c" ${AWS_AVX2_FLAG})
225229
message(STATUS "Building SIMD base64 decoder")
226230
endif()
227231

bin/system_info/print_system_info.c

+14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <aws/common/byte_buf.h>
44
#include <aws/common/logging.h>
55
#include <aws/common/system_info.h>
6+
#include <aws/common/cpuid.h>
67

78
int main(void) {
89
struct aws_allocator *allocator = aws_default_allocator();
@@ -39,6 +40,19 @@ int main(void) {
3940
fprintf(stdout, " 'numa architecture': 'false'\n");
4041
}
4142

43+
fprintf(stdout, " 'cpu_capabilities': {\n");
44+
fprintf(stdout, " 'arm_crc': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_CRC) ? "true" : "false");
45+
fprintf(stdout, " 'arm_pmull': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_PMULL) ? "true" : "false");
46+
fprintf(stdout, " 'arm_crypto': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_ARM_CRYPTO) ? "true" : "false");
47+
fprintf(stdout, " 'amd_sse4_1': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_SSE_4_1) ? "true" : "false");
48+
fprintf(stdout, " 'amd_sse4_2': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_SSE_4_2) ? "true" : "false");
49+
fprintf(stdout, " 'amd_clmul': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_CLMUL) ? "true" : "false");
50+
fprintf(stdout, " 'amd_vpclmulqdq': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_VPCLMULQDQ) ? "true" : "false");
51+
fprintf(stdout, " 'amd_avx2': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_AVX2) ? "true" : "false");
52+
fprintf(stdout, " 'amd_avx512': %s,\n", aws_cpu_has_feature(AWS_CPU_FEATURE_AVX512) ? "true" : "false");
53+
fprintf(stdout, " 'amd_bmi2': %s\n", aws_cpu_has_feature(AWS_CPU_FEATURE_BMI2) ? "true" : "false");
54+
fprintf(stdout, " }\n");
55+
4256
fprintf(stdout, "}\n");
4357
aws_system_environment_release(env);
4458
aws_logger_clean_up(&logger);

cmake/AwsFeatureTests.cmake

+13
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ if(MINGW)
1717
set(USE_CPU_EXTENSIONS OFF)
1818
endif()
1919

20+
if (USE_CPU_EXTENSIONS)
21+
set(AWS_USE_CPU_EXTENSIONS ON)
22+
endif()
23+
2024
if(NOT CMAKE_CROSSCOMPILING)
2125
check_c_source_runs("
2226
#include <stdbool.h>
@@ -54,6 +58,15 @@ check_c_source_compiles("
5458
}
5559
" AWS_ARCH_INTEL)
5660

61+
check_c_source_compiles("
62+
int main() {
63+
#if !(defined(__x86_64__) || defined(_M_X64))
64+
# error \"not intel\"
65+
#endif
66+
return 0;
67+
}
68+
" AWS_ARCH_INTEL_X64)
69+
5770
check_c_source_compiles("
5871
int main() {
5972
#if !(defined(__aarch64__) || defined(_M_ARM64))

cmake/AwsSIMD.cmake

+73-24
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,39 @@
44
include(CheckCCompilerFlag)
55
include(CheckIncludeFile)
66

7+
if (MSVC)
8+
set(AWS_AVX2_FLAG "/arch:AVX2")
9+
set(AWS_AVX512_FLAG "/arch:AVX512")
10+
set(AWS_AVX512vL_FLAG "")
11+
set(AWS_CLMUL_FLAG "")
12+
set(AWS_SSE4_2_FLAG "")
13+
set(AWS_ARMv8_1_FLAG "/arch:arm8.1")
14+
set(WERROR_FLAG "")
15+
else()
16+
set(AWS_AVX2_FLAG "-mavx -mavx2")
17+
set(AWS_AVX512_FLAG "-mavx512f -mvpclmulqdq")
18+
set(AWS_AVX512vL_FLAG "-mavx512vl")
19+
set(AWS_CLMUL_FLAG "-mpclmul")
20+
set(AWS_SSE4_2_FLAG "-msse4.2")
21+
set(AWS_ARMv8_1_FLAG "-march=armv8-a+crc+crypto -mtune=neoverse-v1")
22+
set(WERROR_FLAG "-Werror")
23+
endif()
24+
725
if (USE_CPU_EXTENSIONS)
8-
if (MSVC)
9-
check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG)
10-
if (HAVE_M_AVX2_FLAG)
11-
set(AVX_CFLAGS "/arch:AVX2")
12-
endif()
13-
else()
14-
check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG)
15-
if (HAVE_M_AVX2_FLAG)
16-
set(AVX_CFLAGS "-mavx -mavx2")
17-
endif()
26+
set(AVX_CFLAGS ${AWS_SSE4_2_FLAG})
27+
28+
check_c_compiler_flag(${AWS_AVX2_FLAG} HAVE_M_AVX2_FLAG)
29+
if (HAVE_M_AVX2_FLAG)
30+
set(AVX_CFLAGS "${AWS_AVX2_FLAG} ${AVX_CFLAGS}")
1831
endif()
1932

20-
if (MSVC)
21-
check_c_compiler_flag("/arch:AVX512" HAVE_M_AVX512_FLAG)
22-
if (HAVE_M_AVX512_FLAG)
23-
# docs imply AVX512 brings in AVX2. And it will compile, but it will break at runtime on
24-
# instructions such as _mm256_load_si256(). Leave it on.
25-
set(AVX_CFLAGS "/arch:AVX512 /arch:AVX2")
26-
endif()
27-
else()
28-
check_c_compiler_flag("-mavx512f -mvpclmulqdq" HAVE_M_AVX512_FLAG)
29-
if (HAVE_M_AVX512_FLAG)
30-
set(AVX_CFLAGS "-mavx512f -mvpclmulqdq -mpclmul -mavx -mavx2 -msse4.2")
31-
endif()
33+
check_c_compiler_flag("${AWS_AVX512_FLAG} ${AWS_CLMUL_FLAG}" HAVE_M_AVX512_FLAG)
34+
if (HAVE_M_AVX512_FLAG)
35+
set(AVX_CFLAGS "${AWS_AVX512_FLAG} ${AWS_CLMUL_FLAG} ${AVX_CFLAGS}")
3236
endif()
3337

3438
set(old_flags "${CMAKE_REQUIRED_FLAGS}")
35-
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX_CFLAGS}")
39+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX_CFLAGS} ${WERROR_FLAG}")
3640

3741
check_c_source_compiles("
3842
#include <immintrin.h>
@@ -68,7 +72,35 @@ if (USE_CPU_EXTENSIONS)
6872
return (int)_mm256_extract_epi64(vec, 2);
6973
}" AWS_HAVE_MM256_EXTRACT_EPI64)
7074

75+
check_c_source_compiles("
76+
#include <wmmintrin.h>
77+
#include <emmintrin.h>
78+
int main() {
79+
__m128i a = _mm_setzero_si128();
80+
__m128i b = _mm_setzero_si128();
81+
__m128i result = _mm_clmulepi64_si128(a, b, 0x00);
82+
(void)result;
83+
return 0;
84+
}" AWS_HAVE_CLMUL)
85+
86+
set(CMAKE_REQUIRED_FLAGS "${old_flags} ${AWS_ARMv8_1_FLAG} ${WERROR_FLAG}")
87+
check_c_source_compiles("
88+
#include <arm_acle.h>
89+
int main() {
90+
int crc = __crc32d(0, 1);
91+
return 0;
92+
}" AWS_HAVE_ARM32_CRC)
93+
94+
check_c_source_compiles("
95+
#include <stdatomic.h>
96+
int main() {
97+
_Atomic int var = 0;
98+
atomic_fetch_add_explicit(&var, 1, memory_order_relaxed);
99+
return 0;
100+
}" AWS_HAVE_ARMv8_1)
101+
71102
set(CMAKE_REQUIRED_FLAGS "${old_flags}")
103+
72104
endif() # USE_CPU_EXTENSIONS
73105

74106
# The part where the definition is added to the compiler flags has been moved to config.h.in
@@ -80,6 +112,23 @@ endif() # USE_CPU_EXTENSIONS
80112
function(simd_add_source_avx target)
81113
foreach(file ${ARGN})
82114
target_sources(${target} PRIVATE ${file})
83-
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX_CFLAGS}")
115+
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS " ${AVX_CFLAGS}")
84116
endforeach()
85117
endfunction(simd_add_source_avx)
118+
119+
# The part where the definition is added to the compiler flags has been moved to config.h.in
120+
# see git history for more details.
121+
122+
# Adds compiler flags to the source and adds the source to target.
123+
# Unfortunately the flags have to be passed as strings. Predefined flags are
124+
# at the top of this file.
125+
# Usage: simd_append_source_and_features(target file1.c ${AWS_AVX512_FLAG} ${AWS_AVX2_FLAG} ...)
126+
function(simd_append_source_and_features target file)
127+
set(CC_FLAGS "")
128+
foreach(flag ${ARGN})
129+
set(CC_FLAGS "${CC_FLAGS} ${flag}")
130+
endforeach()
131+
132+
target_sources(${target} PRIVATE ${file})
133+
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS " ${CC_FLAGS}")
134+
endfunction(simd_append_source_and_features)

include/aws/common/config.h.in

+7
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,12 @@
2222
#cmakedefine AWS_HAVE_AVX2_INTRINSICS
2323
#cmakedefine AWS_HAVE_AVX512_INTRINSICS
2424
#cmakedefine AWS_HAVE_MM256_EXTRACT_EPI64
25+
#cmakedefine AWS_HAVE_CLMUL
26+
#cmakedefine AWS_HAVE_ARM32_CRC
27+
#cmakedefine AWS_HAVE_ARMv8_1
28+
#cmakedefine AWS_ARCH_ARM64
29+
#cmakedefine AWS_ARCH_INTEL
30+
#cmakedefine AWS_ARCH_INTEL_X64
31+
#cmakedefine AWS_USE_CPU_EXTENSIONS
2532

2633
#endif

include/aws/common/cpuid.h

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ enum aws_cpu_feature_name {
1818
AWS_CPU_FEATURE_ARM_CRC,
1919
AWS_CPU_FEATURE_BMI2,
2020
AWS_CPU_FEATURE_VPCLMULQDQ,
21+
AWS_CPU_FEATURE_ARM_PMULL,
22+
AWS_CPU_FEATURE_ARM_CRYPTO,
2123
AWS_CPU_FEATURE_COUNT,
2224
};
2325

source/arch/arm/asm/cpuid.c renamed to source/arch/arm/auxv/cpuid.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ struct cap_bits {
2929

3030
# if (defined(__aarch64__))
3131
struct cap_bits s_check_cap[AWS_CPU_FEATURE_COUNT] = {
32-
[AWS_CPU_FEATURE_ARM_CRC] = {0, 1 << 7 /* HWCAP_CRC */},
32+
[AWS_CPU_FEATURE_ARM_CRC] = {0, 1 << 7 /* HWCAP_CRC32 */},
33+
[AWS_CPU_FEATURE_ARM_PMULL] = {0, 1 << 4 /* HWCAP_PMULL */},
34+
[AWS_CPU_FEATURE_ARM_CRYPTO] = {0, 1 << 3 /* HWCAP_AES */},
3335
};
3436
# else
3537
struct cap_bits s_check_cap[AWS_CPU_FEATURE_COUNT] = {
@@ -67,6 +69,10 @@ bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
6769

6870
switch (feature_name) {
6971
case AWS_CPU_FEATURE_ARM_CRC:
72+
# if (defined(__aarch64__))
73+
case AWS_CPU_FEATURE_ARM_PMULL:
74+
case AWS_CPU_FEATURE_ARM_CRYPTO:
75+
# endif // (defined(__aarch64__))
7076
return s_hwcap[s_check_cap[feature_name].cap] & s_check_cap[feature_name].bit;
7177
default:
7278
return false;

source/arch/arm/darwin/cpuid.c

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
#include <aws/common/cpuid.h>
17+
18+
#include <sys/sysctl.h>
19+
20+
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
21+
int64_t ret = 0;
22+
size_t size = sizeof(ret);
23+
24+
switch (feature_name) {
25+
case AWS_CPU_FEATURE_ARM_PMULL:
26+
if (sysctlbyname("hw.optional.arm.FEAT_PMULL", &ret, &size, NULL, 0) != -1) {
27+
return ret == 1;
28+
}
29+
case AWS_CPU_FEATURE_ARM_CRC:
30+
if (sysctlbyname("hw.optional.armv8_crc32", &ret, &size, NULL, 0) != -1) {
31+
return ret == 1;
32+
}
33+
case AWS_CPU_FEATURE_ARM_CRYPTO:
34+
if (sysctlbyname("hw.optional.arm.FEAT_AES", &ret, &size, NULL, 0) != -1) {
35+
return ret == 1;
36+
}
37+
default:
38+
return false;
39+
}
40+
}

source/arch/arm/msvc/cpuid.c renamed to source/arch/arm/windows/cpuid.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,18 @@
1313
* permissions and limitations under the License.
1414
*/
1515

16+
#include <Windows.h>
1617
#include <aws/common/cpuid.h>
17-
#include <stdlib.h>
1818

1919
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
20-
return false;
20+
switch (feature_name) {
21+
case AWS_CPU_FEATURE_ARM_CRC:
22+
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
23+
// this is the best we've got on windows as they don't separate PMULL and AES from each other.
24+
case AWS_CPU_FEATURE_ARM_PMULL:
25+
case AWS_CPU_FEATURE_ARM_CRYPTO:
26+
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
27+
default:
28+
return false;
29+
}
2130
}

source/arch/intel/cpuid.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ static bool s_has_bmi2(void) {
116116
static bool s_has_vpclmulqdq(void) {
117117
uint32_t abcd[4];
118118
/* Check VPCLMULQDQ:
119-
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 20]==1 */
120-
uint32_t vpclmulqdq_mask = (1 << 20);
119+
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */
120+
uint32_t vpclmulqdq_mask = (1 << 10);
121121
aws_run_cpuid(7, 0, abcd);
122122
if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
123123
return false;

0 commit comments

Comments
 (0)