Skip to content

Commit 09354cb

Browse files
committed
Merge PR #2045 of Wunkolo
block_class: Complete x86-SIMD
2 parents 97ef871 + cbd9496 commit 09354cb

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

overviewer_core/src/block_class.c

+22
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,28 @@ bool block_class_is_subset(
3131
size_t block_class_len) {
3232
size_t i = 0;
3333

34+
#if defined(__AVX512F__) && defined(__AVX512BW__)
35+
for (; i / 32 < block_class_len / 32; i += 32) {
36+
const __m512i block_class_vec = _mm512_loadu_si512(
37+
(__m512i*)&block_class[i]);
38+
const __m512i block_vec = _mm512_set1_epi16(block);
39+
const __mmask32 block_cmp = _mm512_cmpeq_epi16_mask(block_vec, block_class_vec);
40+
if (block_cmp) {
41+
return true;
42+
}
43+
}
44+
#endif
45+
#if defined(__AVX2__)
46+
for (; i / 16 < block_class_len / 16; i += 16) {
47+
const __m256i block_class_vec = _mm256_loadu_si256(
48+
(__m256i*)&block_class[i]);
49+
const __m256i block_vec = _mm256_set1_epi16(block);
50+
const __m256i block_cmp = _mm256_cmpeq_epi16(block_vec, block_class_vec);
51+
if (_mm256_movemask_epi8(block_cmp)) {
52+
return true;
53+
}
54+
}
55+
#endif
3456
#ifdef __SSE2__
3557
for (; i / 8 < block_class_len / 8; i += 8) {
3658
const __m128i block_class_vec = _mm_loadu_si128(

overviewer_core/src/overviewer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
// increment this value if you've made a change to the c extension
3333
// and want to force users to rebuild
34-
#define OVERVIEWER_EXTENSION_VERSION 109
34+
#define OVERVIEWER_EXTENSION_VERSION 110
3535

3636
#include <stdbool.h>
3737
#include <stdint.h>

0 commit comments

Comments
 (0)