Skip to content

Commit f434dee

Browse files
Renamed/renumbered WAVM's ltz_mask to match the proposed bitmask instruction.
1 parent 5cf99d1 commit f434dee

File tree

6 files changed

+94
-90
lines changed

6 files changed

+94
-90
lines changed

Include/WAVM/IR/FeatureSpec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// command-line.
1919
#define WAVM_ENUM_PROPOSED_FEATURES(V) \
2020
V(simd, "simd", "128-bit SIMD") \
21+
V(simdBitMask, "simd-bitmask", "128-bit SIMD bitmask instructions") \
2122
V(atomics, "atomics", "Shared memories and atomic instructions") \
2223
V(exceptionHandling, "exception-handling", "Exception handling") \
2324
V(multipleResultsAndBlockParams, "multivalue", "Multiple results and block parameters") \

Include/WAVM/IR/OperatorTable.h

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -479,35 +479,35 @@
479479
visitOp(0xfde1, i8x16_abs , "i8x16.abs" , NoImm , WAVM_UNARY(v128,v128) , simd ) \
480480
visitOp(0xfde2, i16x8_abs , "i16x8.abs" , NoImm , WAVM_UNARY(v128,v128) , simd ) \
481481
visitOp(0xfde3, i32x4_abs , "i32x4.abs" , NoImm , WAVM_UNARY(v128,v128) , simd ) \
482-
/* v128 interleaved load/store instructions */ \
483-
visitOp(0xfde4, v8x16_load_interleaved_2 , "v8x16.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
484-
visitOp(0xfde5, v8x16_load_interleaved_3 , "v8x16.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
485-
visitOp(0xfde6, v8x16_load_interleaved_4 , "v8x16.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
486-
visitOp(0xfde7, v16x8_load_interleaved_2 , "v16x8.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
487-
visitOp(0xfde8, v16x8_load_interleaved_3 , "v16x8.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
488-
visitOp(0xfde9, v16x8_load_interleaved_4 , "v16x8.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
489-
visitOp(0xfdea, v32x4_load_interleaved_2 , "v32x4.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
490-
visitOp(0xfdeb, v32x4_load_interleaved_3 , "v32x4.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
491-
visitOp(0xfdec, v32x4_load_interleaved_4 , "v32x4.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
492-
visitOp(0xfded, v64x2_load_interleaved_2 , "v64x2.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
493-
visitOp(0xfdee, v64x2_load_interleaved_3 , "v64x2.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
494-
visitOp(0xfdef, v64x2_load_interleaved_4 , "v64x2.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
495-
visitOp(0xfdf0, v8x16_store_interleaved_2 , "v8x16.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
496-
visitOp(0xfdf1, v8x16_store_interleaved_3 , "v8x16.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
497-
visitOp(0xfdf2, v8x16_store_interleaved_4 , "v8x16.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
498-
visitOp(0xfdf3, v16x8_store_interleaved_2 , "v16x8.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
499-
visitOp(0xfdf4, v16x8_store_interleaved_3 , "v16x8.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
500-
visitOp(0xfdf5, v16x8_store_interleaved_4 , "v16x8.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
501-
visitOp(0xfdf6, v32x4_store_interleaved_2 , "v32x4.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
502-
visitOp(0xfdf7, v32x4_store_interleaved_3 , "v32x4.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
503-
visitOp(0xfdf8, v32x4_store_interleaved_4 , "v32x4.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
504-
visitOp(0xfdf9, v64x2_store_interleaved_2 , "v64x2.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
505-
visitOp(0xfdfa, v64x2_store_interleaved_3 , "v64x2.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
506-
visitOp(0xfdfb, v64x2_store_interleaved_4 , "v64x2.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
507482
/* v128 ltz_mask */ \
508-
visitOp(0xfdfc, i8x16_ltz_mask , "i8x16.ltz_mask" , NoImm , WAVM_UNARY(v128,i32) , ltzMask ) \
509-
visitOp(0xfdfd, i16x8_ltz_mask , "i16x8.ltz_mask" , NoImm , WAVM_UNARY(v128,i32) , ltzMask ) \
510-
visitOp(0xfdfe, i32x4_ltz_mask , "i32x4.ltz_mask" , NoImm , WAVM_UNARY(v128,i32) , ltzMask ) \
483+
visitOp(0xfde4, i8x16_bitmask , "i8x16.bitmask" , NoImm , WAVM_UNARY(v128,i32) , simdBitMask ) \
484+
visitOp(0xfde5, i16x8_bitmask , "i16x8.bitmask" , NoImm , WAVM_UNARY(v128,i32) , simdBitMask ) \
485+
visitOp(0xfde6, i32x4_bitmask , "i32x4.bitmask" , NoImm , WAVM_UNARY(v128,i32) , simdBitMask ) \
486+
/* v128 interleaved load/store instructions */ \
487+
visitOp(0xfde7, v8x16_load_interleaved_2 , "v8x16.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
488+
visitOp(0xfde8, v8x16_load_interleaved_3 , "v8x16.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
489+
visitOp(0xfde9, v8x16_load_interleaved_4 , "v8x16.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
490+
visitOp(0xfdea, v16x8_load_interleaved_2 , "v16x8.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
491+
visitOp(0xfdeb, v16x8_load_interleaved_3 , "v16x8.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
492+
visitOp(0xfdec, v16x8_load_interleaved_4 , "v16x8.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
493+
visitOp(0xfded, v32x4_load_interleaved_2 , "v32x4.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
494+
visitOp(0xfdee, v32x4_load_interleaved_3 , "v32x4.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
495+
visitOp(0xfdef, v32x4_load_interleaved_4 , "v32x4.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
496+
visitOp(0xfdf0, v64x2_load_interleaved_2 , "v64x2.load_interleaved_2" , LoadOrStoreImm<4> , WAVM_LOAD_2(v128) , interleavedLoadStore ) \
497+
visitOp(0xfdf1, v64x2_load_interleaved_3 , "v64x2.load_interleaved_3" , LoadOrStoreImm<4> , WAVM_LOAD_3(v128) , interleavedLoadStore ) \
498+
visitOp(0xfdf2, v64x2_load_interleaved_4 , "v64x2.load_interleaved_4" , LoadOrStoreImm<4> , WAVM_LOAD_4(v128) , interleavedLoadStore ) \
499+
visitOp(0xfdf3, v8x16_store_interleaved_2 , "v8x16.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
500+
visitOp(0xfdf4, v8x16_store_interleaved_3 , "v8x16.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
501+
visitOp(0xfdf5, v8x16_store_interleaved_4 , "v8x16.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
502+
visitOp(0xfdf6, v16x8_store_interleaved_2 , "v16x8.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
503+
visitOp(0xfdf7, v16x8_store_interleaved_3 , "v16x8.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
504+
visitOp(0xfdf8, v16x8_store_interleaved_4 , "v16x8.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
505+
visitOp(0xfdf9, v32x4_store_interleaved_2 , "v32x4.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
506+
visitOp(0xfdfa, v32x4_store_interleaved_3 , "v32x4.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
507+
visitOp(0xfdfb, v32x4_store_interleaved_4 , "v32x4.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
508+
visitOp(0xfdfc, v64x2_store_interleaved_2 , "v64x2.store_interleaved_2" , LoadOrStoreImm<4> , WAVM_STORE_2(v128) , interleavedLoadStore ) \
509+
visitOp(0xfdfd, v64x2_store_interleaved_3 , "v64x2.store_interleaved_3" , LoadOrStoreImm<4> , WAVM_STORE_3(v128) , interleavedLoadStore ) \
510+
visitOp(0xfdfe, v64x2_store_interleaved_4 , "v64x2.store_interleaved_4" , LoadOrStoreImm<4> , WAVM_STORE_4(v128) , interleavedLoadStore ) \
511511
/* Atomic wait/wake */ \
512512
visitOp(0xfe00, memory_atomic_notify , "memory.atomic.notify" , AtomicLoadOrStoreImm<2> , WAVM_BINARY(i32,i32) , atomics ) \
513513
visitOp(0xfe01, memory_atomic_wait32 , "memory.atomic.wait32" , AtomicLoadOrStoreImm<2> , WAVM_WAIT(i32) , atomics ) \

Lib/LLVMJIT/EmitConvert.cpp

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,18 @@ EMIT_SIMD_WIDEN(i32x4_widen_high_i16x8_s, llvmContext.i32x4Type, llvmContext.i16
389389
EMIT_SIMD_WIDEN(i32x4_widen_low_i16x8_u, llvmContext.i32x4Type, llvmContext.i16x8Type, 0, 4, zext)
390390
EMIT_SIMD_WIDEN(i32x4_widen_high_i16x8_u, llvmContext.i32x4Type, llvmContext.i16x8Type, 4, 4, zext)
391391

392-
void EmitFunctionContext::i8x16_ltz_mask(NoImm)
392+
void EmitFunctionContext::i8x16_bitmask(NoImm)
393393
{
394394
auto i8x16Operand = irBuilder.CreateBitCast(pop(), llvmContext.i8x16Type);
395395
auto i1x16Mask = irBuilder.CreateICmpSLT(
396396
i8x16Operand, llvm::ConstantVector::getNullValue(llvmContext.i8x16Type));
397-
if(moduleContext.targetArch == llvm::Triple::aarch64)
397+
if(moduleContext.targetArch == llvm::Triple::x86_64
398+
|| moduleContext.targetArch == llvm::Triple::x86)
399+
{
400+
push(irBuilder.CreateZExt(irBuilder.CreateBitCast(i1x16Mask, llvmContext.i16Type),
401+
llvmContext.i32Type));
402+
}
403+
else
398404
{
399405
auto i8x16Mask = irBuilder.CreateSExt(i1x16Mask, llvmContext.i8x16Type);
400406
auto constant1 = llvm::ConstantInt::get(llvmContext.i8Type, 1);
@@ -442,19 +448,20 @@ void EmitFunctionContext::i8x16_ltz_mask(NoImm)
442448
emitLiteral(llvmContext, U32(8))));
443449
push(i32CombinedBitMask);
444450
}
445-
else
446-
{
447-
push(irBuilder.CreateZExt(irBuilder.CreateBitCast(i1x16Mask, llvmContext.i16Type),
448-
llvmContext.i32Type));
449-
}
450451
}
451452

452-
void EmitFunctionContext::i16x8_ltz_mask(NoImm)
453+
void EmitFunctionContext::i16x8_bitmask(NoImm)
453454
{
454455
auto i8x16Operand = irBuilder.CreateBitCast(pop(), llvmContext.i16x8Type);
455456
auto i1x8Mask = irBuilder.CreateICmpSLT(
456457
i8x16Operand, llvm::ConstantVector::getNullValue(llvmContext.i16x8Type));
457-
if(moduleContext.targetArch == llvm::Triple::aarch64)
458+
if(moduleContext.targetArch == llvm::Triple::x86_64
459+
|| moduleContext.targetArch == llvm::Triple::x86)
460+
{
461+
push(irBuilder.CreateZExt(irBuilder.CreateBitCast(i1x8Mask, llvmContext.i8Type),
462+
llvmContext.i32Type));
463+
}
464+
else
458465
{
459466
auto i16x8Mask = irBuilder.CreateSExt(i1x8Mask, llvmContext.i16x8Type);
460467
auto constant1 = llvm::ConstantInt::get(llvmContext.i16Type, 1);
@@ -479,19 +486,21 @@ void EmitFunctionContext::i16x8_ltz_mask(NoImm)
479486
{i16x8OrthogonalBitMask});
480487
push(irBuilder.CreateZExt(i16CombinedBitMask, llvmContext.i32Type));
481488
}
482-
else
483-
{
484-
push(irBuilder.CreateZExt(irBuilder.CreateBitCast(i1x8Mask, llvmContext.i8Type),
485-
llvmContext.i32Type));
486-
}
487489
}
488490

489-
void EmitFunctionContext::i32x4_ltz_mask(NoImm)
491+
void EmitFunctionContext::i32x4_bitmask(NoImm)
490492
{
491493
auto i32x4Operand = irBuilder.CreateBitCast(pop(), llvmContext.i32x4Type);
492494
auto i1x4Mask = irBuilder.CreateICmpSLT(
493495
i32x4Operand, llvm::ConstantVector::getNullValue(llvmContext.i32x4Type));
494-
if(moduleContext.targetArch == llvm::Triple::aarch64)
496+
if(moduleContext.targetArch == llvm::Triple::x86_64
497+
|| moduleContext.targetArch == llvm::Triple::x86)
498+
{
499+
push(irBuilder.CreateZExt(
500+
irBuilder.CreateBitCast(i1x4Mask, llvm::IntegerType::get(llvmContext, 4)),
501+
llvmContext.i32Type));
502+
}
503+
else
495504
{
496505
auto i32x4Mask = irBuilder.CreateSExt(i1x4Mask, llvmContext.i32x4Type);
497506
auto constant1 = llvm::ConstantInt::get(llvmContext.i32Type, 1);
@@ -510,10 +519,4 @@ void EmitFunctionContext::i32x4_ltz_mask(NoImm)
510519
{i32x4OrthogonalBitMask});
511520
push(i32CombinedBitMask);
512521
}
513-
else
514-
{
515-
push(irBuilder.CreateZExt(
516-
irBuilder.CreateBitCast(i1x4Mask, llvm::IntegerType::get(llvmContext, 4)),
517-
llvmContext.i32Type));
518-
}
519522
}

Test/benchmark/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ADD_WAST_TESTS(
2-
SOURCES ltz_mask.wast
2+
SOURCES bitmask.wast
33
memory_copy_benchmark.wast
44
interleaved_load_store_benchmark.wast
55
WAVM_ARGS "--trace-assembly"

Test/benchmark/ltz_mask.wast renamed to Test/benchmark/bitmask.wast

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,54 @@
11
(module
22
(memory 1)
3-
(func (export "i8x16.ltz_mask")
3+
(func (export "i8x16.bitmask")
44
(param $numIterations i32)
55
(result i32)
66
(local $i i32)
77
(local $result i32)
88
loop $loop
99
(local.set $result (i32.xor
1010
(local.get $result)
11-
(i8x16.ltz_mask (v128.load (i32.const 0)))
11+
(i8x16.bitmask (v128.load (i32.const 0)))
1212
))
1313
(local.set $i (i32.add (local.get $i) (i32.const 1)))
1414
(br_if $loop (i32.lt_u (local.get $i) (local.get $numIterations)))
1515
end
1616
(local.get $result)
1717
)
1818

19-
(func (export "i16x8.ltz_mask")
19+
(func (export "i16x8.bitmask")
2020
(param $numIterations i32)
2121
(result i32)
2222
(local $i i32)
2323
(local $result i32)
2424
loop $loop
2525
(local.set $result (i32.xor
2626
(local.get $result)
27-
(i16x8.ltz_mask (v128.load (i32.const 0)))
27+
(i16x8.bitmask (v128.load (i32.const 0)))
2828
))
2929
(local.set $i (i32.add (local.get $i) (i32.const 1)))
3030
(br_if $loop (i32.lt_u (local.get $i) (local.get $numIterations)))
3131
end
3232
(local.get $result)
3333
)
3434

35-
(func (export "i32x4.ltz_mask")
35+
(func (export "i32x4.bitmask")
3636
(param $numIterations i32)
3737
(result i32)
3838
(local $i i32)
3939
(local $result i32)
4040
loop $loop
4141
(local.set $result (i32.xor
4242
(local.get $result)
43-
(i32x4.ltz_mask (v128.load (i32.const 0)))
43+
(i32x4.bitmask (v128.load (i32.const 0)))
4444
))
4545
(local.set $i (i32.add (local.get $i) (i32.const 1)))
4646
(br_if $loop (i32.lt_u (local.get $i) (local.get $numIterations)))
4747
end
4848
(local.get $result)
4949
)
5050

51-
(func (export "emulated i8x16.ltz_mask")
51+
(func (export "emulated i8x16.bitmask")
5252
(param $numIterations i32)
5353
(result i32)
5454
(local $i i32)
@@ -81,8 +81,8 @@
8181
)
8282
)
8383

84-
(benchmark "i8x16.ltz_mask" (invoke "i8x16.ltz_mask" (i32.const 1000000)))
85-
(benchmark "emulated i8x16.ltz_mask" (invoke "emulated i8x16.ltz_mask" (i32.const 1000000)))
84+
(benchmark "i8x16.bitmask" (invoke "i8x16.bitmask" (i32.const 1000000)))
85+
(benchmark "emulated i8x16.bitmask" (invoke "emulated i8x16.bitmask" (i32.const 1000000)))
8686

87-
(benchmark "i16x8.ltz_mask" (invoke "i16x8.ltz_mask" (i32.const 1000000)))
88-
(benchmark "i32x4.ltz_mask" (invoke "i32x4.ltz_mask" (i32.const 1000000)))
87+
(benchmark "i16x8.bitmask" (invoke "i16x8.bitmask" (i32.const 1000000)))
88+
(benchmark "i32x4.bitmask" (invoke "i32x4.bitmask" (i32.const 1000000)))

0 commit comments

Comments
 (0)