Skip to content

Commit f3c18fb

Browse files
committed
Add wasm simd support
This commit adds simd acceleration support to the `memmem` module. This is added with the freshly-stabilized support from rust-lang/rust#86204. This mostly just cribs off the generic simd support for 128-bit types built for sse, copying bits and pieces of code here and there. Some refactoring happened internally to help reduce duplication where possible. I ran some initial benchmarks with the `memmem/krate/*` regex and a hacked up single-threaded version of criterion. Some [initial comparisons][compare] using Wasmtime as a runtime do indeed show a lot of improvements, but there are indeed some slowdowns as well. [compare]: https://gist.github.com/alexcrichton/6a72e682e7b6d505ade605359fbe3f2d
1 parent 8e1da98 commit f3c18fb

File tree

9 files changed

+287
-113
lines changed

9 files changed

+287
-113
lines changed

.github/workflows/ci.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
- stable
2828
- stable-32
2929
- stable-mips
30+
- wasm
3031
- beta
3132
- nightly
3233
- macos
@@ -62,6 +63,10 @@ jobs:
6263
- build: win-gnu
6364
os: windows-2019
6465
rust: stable-x86_64-gnu
66+
- build: wasm
67+
os: ubuntu-18.04
68+
rust: stable-x86_64-gnu
69+
wasm: true
6570
steps:
6671
- name: Checkout repository
6772
uses: actions/checkout@v1
@@ -81,6 +86,16 @@ jobs:
8186
cargo install --git https://github.com/rust-embedded/cross
8287
echo "CARGO=cross" >> $GITHUB_ENV
8388
echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV
89+
- name: Download Wasmtime
90+
if: matrix.wasm
91+
run: |
92+
rustup target add wasm32-wasi
93+
echo "CARGO_BUILD_TARGET=wasm32-wasi" >> $GITHUB_ENV
94+
echo "RUSTFLAGS=-Ctarget-feature=+simd128" >> $GITHUB_ENV
95+
curl -LO https://github.com/bytecodealliance/wasmtime/releases/download/v0.32.0/wasmtime-v0.32.0-x86_64-linux.tar.xz
96+
tar xvf wasmtime-v0.32.0-x86_64-linux.tar.xz
97+
echo `pwd`/wasmtime-v0.32.0-x86_64-linux >> $GITHUB_PATH
98+
echo "CARGO_TARGET_WASM32_WASI_RUNNER=wasmtime run --wasm-features simd --" >> $GITHUB_ENV
8499
- name: Show command used for Cargo
85100
run: |
86101
echo "cargo command is: ${{ env.CARGO }}"

bench/src/memmem/imp.rs

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -640,44 +640,47 @@ pub(crate) mod sliceslice {
640640
}
641641

642642
pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static {
643-
unimplemented!("sliceslice only runs on x86")
643+
if true {
644+
unimplemented!("sliceslice only runs on x86")
645+
}
646+
|_| false
644647
}
645648

646649
pub(crate) fn oneshotiter<'a>(
647-
haystack: &'a str,
648-
needle: &'a str,
650+
_haystack: &'a str,
651+
_needle: &'a str,
649652
) -> impl Iterator<Item = usize> + 'static {
650653
std::iter::from_fn(move || {
651654
unimplemented!("sliceslice only runs on x86")
652655
})
653656
}
654657

655-
pub(crate) fn prebuiltiter(needle: &str) -> super::super::NoIter {
658+
pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
656659
unimplemented!("sliceslice only runs on x86")
657660
}
658661
}
659662

660663
pub(crate) mod rev {
661-
pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
664+
pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
662665
unimplemented!("sliceslice does not support reverse searches")
663666
}
664667

665668
pub(crate) fn prebuilt(
666-
needle: &str,
669+
_needle: &str,
667670
) -> impl Fn(&str) -> bool + 'static {
668671
|_| unimplemented!("sliceslice does not support reverse searches")
669672
}
670673

671674
pub(crate) fn oneshotiter(
672-
haystack: &str,
673-
needle: &str,
675+
_haystack: &str,
676+
_needle: &str,
674677
) -> impl Iterator<Item = usize> + 'static {
675678
std::iter::from_fn(move || {
676679
unimplemented!("sliceslice does not support reverse searches")
677680
})
678681
}
679682

680-
pub(crate) fn prebuiltiter(needle: &str) -> super::super::NoIter {
683+
pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
681684
unimplemented!("sliceslice does not support reverse searches")
682685
}
683686
}
@@ -693,9 +696,21 @@ pub(crate) mod libc {
693696
}
694697

695698
pub(crate) mod fwd {
699+
#[cfg(target_arch = "wasm32")]
700+
extern "C" {
701+
fn memmem(
702+
haystack: *const libc::c_void,
703+
haystack_len: usize,
704+
needle: *const libc::c_void,
705+
needle_len: usize,
706+
) -> *const libc::c_void;
707+
}
708+
#[cfg(not(target_arch = "wasm32"))]
709+
use libc::memmem;
710+
696711
fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
697712
let p = unsafe {
698-
libc::memmem(
713+
memmem(
699714
haystack.as_ptr() as *const libc::c_void,
700715
haystack.len(),
701716
needle.as_ptr() as *const libc::c_void,

build.rs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,29 @@ fn main() {
1515
// is not a problem. In that case, the fastest option will be chosen at
1616
// runtime.
1717
fn enable_simd_optimizations() {
18-
if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD")
19-
|| !target_has_feature("sse2")
20-
{
18+
if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") {
2119
return;
2220
}
23-
println!("cargo:rustc-cfg=memchr_runtime_simd");
24-
println!("cargo:rustc-cfg=memchr_runtime_sse2");
25-
println!("cargo:rustc-cfg=memchr_runtime_sse42");
26-
println!("cargo:rustc-cfg=memchr_runtime_avx");
21+
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
22+
match &arch[..] {
23+
"x86_64" => {
24+
if !target_has_feature("sse2") {
25+
return;
26+
}
27+
println!("cargo:rustc-cfg=memchr_runtime_simd");
28+
println!("cargo:rustc-cfg=memchr_runtime_sse2");
29+
println!("cargo:rustc-cfg=memchr_runtime_sse42");
30+
println!("cargo:rustc-cfg=memchr_runtime_avx");
31+
}
32+
"wasm32" | "wasm64" => {
33+
if !target_has_feature("simd128") {
34+
return;
35+
}
36+
println!("cargo:rustc-cfg=memchr_runtime_simd");
37+
println!("cargo:rustc-cfg=memchr_runtime_wasm128");
38+
}
39+
_ => {}
40+
}
2741
}
2842

2943
// This adds a `memchr_libc` cfg if and only if libc can be used, if no other

src/memmem/mod.rs

Lines changed: 37 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,17 @@ macro_rules! define_memmem_simple_tests {
146146
}
147147

148148
mod byte_frequencies;
149-
#[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
149+
#[cfg(memchr_runtime_simd)]
150150
mod genericsimd;
151151
mod prefilter;
152152
mod rabinkarp;
153153
mod rarebytes;
154154
mod twoway;
155155
mod util;
156-
// SIMD is only supported on x86_64 currently.
157-
#[cfg(target_arch = "x86_64")]
156+
#[cfg(memchr_runtime_simd)]
158157
mod vector;
158+
#[cfg(all(memchr_runtime_wasm128))]
159+
mod wasm;
159160
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
160161
mod x86;
161162

@@ -773,47 +774,47 @@ enum SearcherKind {
773774
TwoWay(twoway::Forward),
774775
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
775776
GenericSIMD128(x86::sse::Forward),
777+
#[cfg(memchr_runtime_wasm128)]
778+
GenericSIMD128(wasm::Forward),
776779
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
777780
GenericSIMD256(x86::avx::Forward),
778781
}
779782

780783
impl<'n> Searcher<'n> {
781-
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
782784
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
783785
use self::SearcherKind::*;
784786

785787
let ninfo = NeedleInfo::new(needle);
786-
let prefn =
787-
prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle);
788-
let kind = if needle.len() == 0 {
789-
Empty
790-
} else if needle.len() == 1 {
791-
OneByte(needle[0])
792-
} else if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
793-
GenericSIMD256(fwd)
794-
} else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
795-
GenericSIMD128(fwd)
796-
} else {
797-
TwoWay(twoway::Forward::new(needle))
788+
let mk = |kind: SearcherKind| {
789+
let prefn = prefilter::forward(
790+
&config.prefilter,
791+
&ninfo.rarebytes,
792+
needle,
793+
);
794+
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
798795
};
799-
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
800-
}
801-
802-
#[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))]
803-
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
804-
use self::SearcherKind::*;
796+
if needle.len() == 0 {
797+
return mk(Empty);
798+
}
799+
if needle.len() == 1 {
800+
return mk(OneByte(needle[0]));
801+
}
802+
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
803+
{
804+
if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
805+
return mk(GenericSIMD256(fwd));
806+
} else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
807+
return mk(GenericSIMD128(fwd));
808+
}
809+
}
810+
#[cfg(all(target_arch = "wasm32", memchr_runtime_simd))]
811+
{
812+
if let Some(fwd) = wasm::Forward::new(&ninfo, needle) {
813+
return mk(GenericSIMD128(fwd));
814+
}
815+
}
805816

806-
let ninfo = NeedleInfo::new(needle);
807-
let prefn =
808-
prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle);
809-
let kind = if needle.len() == 0 {
810-
Empty
811-
} else if needle.len() == 1 {
812-
OneByte(needle[0])
813-
} else {
814-
TwoWay(twoway::Forward::new(needle))
815-
};
816-
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
817+
mk(TwoWay(twoway::Forward::new(needle)))
817818
}
818819

819820
/// Return a fresh prefilter state that can be used with this searcher.
@@ -844,11 +845,7 @@ impl<'n> Searcher<'n> {
844845
Empty => Empty,
845846
OneByte(b) => OneByte(b),
846847
TwoWay(tw) => TwoWay(tw),
847-
#[cfg(all(
848-
not(miri),
849-
target_arch = "x86_64",
850-
memchr_runtime_simd
851-
))]
848+
#[cfg(all(not(miri), memchr_runtime_simd))]
852849
GenericSIMD128(gs) => GenericSIMD128(gs),
853850
#[cfg(all(
854851
not(miri),
@@ -873,11 +870,7 @@ impl<'n> Searcher<'n> {
873870
Empty => Empty,
874871
OneByte(b) => OneByte(b),
875872
TwoWay(tw) => TwoWay(tw),
876-
#[cfg(all(
877-
not(miri),
878-
target_arch = "x86_64",
879-
memchr_runtime_simd
880-
))]
873+
#[cfg(all(not(miri), memchr_runtime_simd))]
881874
GenericSIMD128(gs) => GenericSIMD128(gs),
882875
#[cfg(all(
883876
not(miri),
@@ -921,11 +914,7 @@ impl<'n> Searcher<'n> {
921914
self.find_tw(tw, state, haystack, needle)
922915
}
923916
}
924-
#[cfg(all(
925-
not(miri),
926-
target_arch = "x86_64",
927-
memchr_runtime_simd
928-
))]
917+
#[cfg(all(not(miri), memchr_runtime_simd))]
929918
GenericSIMD128(ref gs) => {
930919
// The SIMD matcher can't handle particularly short haystacks,
931920
// so we fall back to RK in these cases.

0 commit comments

Comments
 (0)