Skip to content

Commit 30dcdfc

Browse files
dlrobertsonBurntSushi
authored andcommitted
[x86] Implement sse4.2 crc32 functions
- Implement - _mm_crc32_u8 - _mm_crc32_u16 - _mm_crc32_u32 - _mm_crc32_u64 - _mm_cmpgt_epi64
1 parent 5503eff commit 30dcdfc

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed

src/x86/sse42.rs

+99
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,56 @@ pub unsafe fn _mm_cmpestra(
289289
constify_imm8!(imm8, call)
290290
}
291291

292+
/// Starting with the initial value in `crc`, return the accumulated
293+
/// CRC32 value for unsigned 8-bit integer `v`.
294+
#[inline(always)]
295+
#[target_feature = "+sse4.2"]
296+
#[cfg_attr(test, assert_instr(crc32))]
297+
pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
298+
crc32_32_8(crc, v)
299+
}
300+
301+
/// Starting with the initial value in `crc`, return the accumulated
302+
/// CRC32 value for unsigned 16-bit integer `v`.
303+
#[inline(always)]
304+
#[target_feature = "+sse4.2"]
305+
#[cfg_attr(test, assert_instr(crc32))]
306+
pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
307+
crc32_32_16(crc, v)
308+
}
309+
310+
/// Starting with the initial value in `crc`, return the accumulated
311+
/// CRC32 value for unsigned 32-bit integer `v`.
312+
#[inline(always)]
313+
#[target_feature = "+sse4.2"]
314+
#[cfg_attr(test, assert_instr(crc32))]
315+
pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
316+
crc32_32_32(crc, v)
317+
}
318+
319+
/// Starting with the initial value in `crc`, return the accumulated
320+
/// CRC32 value for unsigned 64-bit integer `v`.
321+
#[cfg(target_arch = "x86_64")]
322+
#[inline(always)]
323+
#[target_feature = "+sse4.2"]
324+
#[cfg_attr(test, assert_instr(crc32))]
325+
pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
326+
crc32_64_64(crc, v)
327+
}
328+
329+
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
330+
/// return the results.
331+
#[cfg(target_arch = "x86_64")]
332+
#[inline(always)]
333+
#[target_feature = "+sse4.2"]
334+
#[cfg_attr(test, assert_instr(pcmpgtq))]
335+
pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
336+
a.gt(b)
337+
}
338+
292339
#[allow(improper_ctypes)]
293340
extern {
341+
// SSE 4.2 string and text comparison ops
294342
#[link_name = "llvm.x86.sse42.pcmpestrm128"]
295343
fn pcmpestrm128(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i8) -> u8x16;
296344
#[link_name = "llvm.x86.sse42.pcmpestri128"]
@@ -319,6 +367,15 @@ extern {
319367
fn pcmpistrio128(a: __m128i, b: __m128i, imm8: i8) -> i32;
320368
#[link_name = "llvm.x86.sse42.pcmpistria128"]
321369
fn pcmpistria128(a: __m128i, b: __m128i, imm8: i8) -> i32;
370+
// SSE 4.2 CRC instructions
371+
#[link_name = "llvm.x86.sse42.crc32.32.8"]
372+
fn crc32_32_8(crc: u32, v: u8) -> u32;
373+
#[link_name = "llvm.x86.sse42.crc32.32.16"]
374+
fn crc32_32_16(crc: u32, v: u16) -> u32;
375+
#[link_name = "llvm.x86.sse42.crc32.32.32"]
376+
fn crc32_32_32(crc: u32, v: u32) -> u32;
377+
#[link_name = "llvm.x86.sse42.crc32.64.64"]
378+
fn crc32_64_64(crc: u64, v: u64) -> u64;
322379
}
323380

324381
#[cfg(test)]
@@ -470,4 +527,46 @@ mod tests {
470527
a, 14, b, 16, sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK);
471528
assert_eq!(1, i);
472529
}
530+
531+
#[simd_test = "sse4.2"]
532+
unsafe fn _mm_crc32_u8() {
533+
let crc = 0x2aa1e72b;
534+
let v = 0x2a;
535+
let i = sse42::_mm_crc32_u8(crc, v);
536+
assert_eq!(i, 0xf24122e4);
537+
}
538+
539+
#[simd_test = "sse4.2"]
540+
unsafe fn _mm_crc32_u16() {
541+
let crc = 0x8ecec3b5;
542+
let v = 0x22b;
543+
let i = sse42::_mm_crc32_u16(crc, v);
544+
assert_eq!(i, 0x13bb2fb);
545+
}
546+
547+
#[simd_test = "sse4.2"]
548+
unsafe fn _mm_crc32_u32() {
549+
let crc = 0xae2912c8;
550+
let v = 0x845fed;
551+
let i = sse42::_mm_crc32_u32(crc, v);
552+
assert_eq!(i, 0xffae2ed1);
553+
}
554+
555+
#[cfg(target_arch = "x86_64")]
556+
#[simd_test = "sse4.2"]
557+
unsafe fn _mm_crc32_u64() {
558+
let crc = 0x7819dccd3e824;
559+
let v = 0x2a22b845fed;
560+
let i = sse42::_mm_crc32_u64(crc, v);
561+
assert_eq!(i, 0xbb6cdc6c);
562+
}
563+
564+
#[cfg(target_arch = "x86_64")]
565+
#[simd_test = "sse4.2"]
566+
unsafe fn _mm_cmpgt_epi64() {
567+
let a = i64x2::splat(0x00).replace(1, 0x2a);
568+
let b = i64x2::splat(0x00);
569+
let i = sse42::_mm_cmpgt_epi64(a, b);
570+
assert_eq!(i, i64x2::new(0x00, 0xffffffffffffffffu64 as i64));
571+
}
473572
}

0 commit comments

Comments
 (0)