3
3
use core:: ascii:: EscapeDefault ;
4
4
5
5
use crate :: fmt:: { self , Write } ;
6
+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
6
7
use crate :: intrinsics:: const_eval_select;
7
- use crate :: { ascii, iter, mem , ops} ;
8
+ use crate :: { ascii, iter, ops} ;
8
9
9
10
#[ cfg( not( test) ) ]
10
11
impl [ u8 ] {
@@ -328,14 +329,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
328
329
}
329
330
}
330
331
331
- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
332
- /// from `../str/mod.rs`, which does something similar for utf8 validation.
333
- #[ inline]
334
- const fn contains_nonascii ( v : usize ) -> bool {
335
- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
336
- ( NONASCII_MASK & v) != 0
337
- }
338
-
339
332
/// ASCII test *without* the chunk-at-a-time optimizations.
340
333
///
341
334
/// This is carefully structured to produce nice small code -- it's smaller in
@@ -366,6 +359,7 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
366
359
///
367
360
/// If any of these loads produces something for which `contains_nonascii`
368
361
/// (above) returns true, then we know the answer is false.
362
+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
369
363
#[ inline]
370
364
#[ rustc_allow_const_fn_unstable( const_eval_select) ] // fallback impl has same behavior
371
365
const fn is_ascii ( s : & [ u8 ] ) -> bool {
@@ -376,7 +370,14 @@ const fn is_ascii(s: &[u8]) -> bool {
376
370
if const {
377
371
is_ascii_simple( s)
378
372
} else {
379
- const USIZE_SIZE : usize = mem:: size_of:: <usize >( ) ;
373
+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
374
+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
375
+ const fn contains_nonascii( v: usize ) -> bool {
376
+ const NONASCII_MASK : usize = usize :: repeat_u8( 0x80 ) ;
377
+ ( NONASCII_MASK & v) != 0
378
+ }
379
+
380
+ const USIZE_SIZE : usize = size_of:: <usize >( ) ;
380
381
381
382
let len = s. len( ) ;
382
383
let align_offset = s. as_ptr( ) . align_offset( USIZE_SIZE ) ;
@@ -386,7 +387,7 @@ const fn is_ascii(s: &[u8]) -> bool {
386
387
//
387
388
// We also do this for architectures where `size_of::<usize>()` isn't
388
389
// sufficient alignment for `usize`, because it's a weird edge case.
389
- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of:: <usize >( ) {
390
+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of:: <usize >( ) {
390
391
return is_ascii_simple( s) ;
391
392
}
392
393
@@ -420,7 +421,7 @@ const fn is_ascii(s: &[u8]) -> bool {
420
421
// have alignment information it should have given a `usize::MAX` for
421
422
// `align_offset` earlier, sending things through the scalar path instead of
422
423
// this one, so this check should pass if it's reachable.
423
- debug_assert!( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
424
+ debug_assert!( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
424
425
425
426
// Read subsequent words until the last aligned word, excluding the last
426
427
// aligned word by itself to be done in tail check later, to ensure that
@@ -455,3 +456,48 @@ const fn is_ascii(s: &[u8]) -> bool {
455
456
}
456
457
)
457
458
}
459
+
460
+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
461
+ /// platforms.
462
+ ///
463
+ /// Other platforms are not likely to benefit from this code structure, so they
464
+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
465
+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
466
+ #[ inline]
467
+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
468
+ // Process chunks of 32 bytes at a time in the fast path to enable
469
+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
470
+ // can be OR'd together and then the resulting vector can be tested for
471
+ // non-ASCII bytes.
472
+ const CHUNK_SIZE : usize = 32 ;
473
+
474
+ let mut i = 0 ;
475
+
476
+ while i + CHUNK_SIZE <= bytes. len ( ) {
477
+ let chunk_end = i + CHUNK_SIZE ;
478
+
479
+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
480
+ // creates a mask from the most significant bit of each byte.
481
+ // ASCII bytes are less than 128 (0x80), so their most significant
482
+ // bit is unset.
483
+ let mut count = 0 ;
484
+ while i < chunk_end {
485
+ count += bytes[ i] . is_ascii ( ) as u8 ;
486
+ i += 1 ;
487
+ }
488
+
489
+ // All bytes should be <= 127 so count is equal to chunk size.
490
+ if count != CHUNK_SIZE as u8 {
491
+ return false ;
492
+ }
493
+ }
494
+
495
+ // Process the remaining `bytes.len() % N` bytes.
496
+ let mut is_ascii = true ;
497
+ while i < bytes. len ( ) {
498
+ is_ascii &= bytes[ i] . is_ascii ( ) ;
499
+ i += 1 ;
500
+ }
501
+
502
+ is_ascii
503
+ }
0 commit comments