Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Implement the remaining AVX2 intrinsic #20210

Merged
merged 3 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1535,17 +1535,17 @@ internal Avx2() { }

/// <summary>
/// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
/// VPERMD ymm, ymm/m256, imm8
/// VPERMD ymm, ymm/m256, ymm
/// </summary>
public static Vector256<int> PermuteVar8x32(Vector256<int> left, Vector256<int> control) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
/// VPERMD ymm, ymm/m256, imm8
/// VPERMD ymm, ymm/m256, ymm
/// </summary>
public static Vector256<uint> PermuteVar8x32(Vector256<uint> left, Vector256<uint> control) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
/// VPERMPS ymm, ymm/m256, imm8
/// VPERMPS ymm, ymm/m256, ymm
/// </summary>
public static Vector256<float> PermuteVar8x32(Vector256<float> left, Vector256<int> control) { throw new PlatformNotSupportedException(); }

Expand Down Expand Up @@ -1885,12 +1885,12 @@ internal Avx2() { }
public static Vector256<byte> Shuffle(Vector256<byte> value, Vector256<byte> mask) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
/// VPSHUFD ymm, ymm, ymm/m256
/// VPSHUFD ymm, ymm/m256, imm8
/// </summary>
public static Vector256<int> Shuffle(Vector256<int> value, byte control) { throw new PlatformNotSupportedException(); }
/// <summary>
/// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
/// VPSHUFD ymm, ymm, ymm/m256
/// VPSHUFD ymm, ymm/m256, imm8
/// </summary>
public static Vector256<uint> Shuffle(Vector256<uint> value, byte control) { throw new PlatformNotSupportedException(); }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2254,17 +2254,17 @@ public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> sou

/// <summary>
/// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
/// VPERMD ymm, ymm/m256, imm8
/// VPERMD ymm, ymm/m256, ymm
/// </summary>
public static Vector256<int> PermuteVar8x32(Vector256<int> left, Vector256<int> control) => PermuteVar8x32(left, control);
/// <summary>
/// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
/// VPERMD ymm, ymm/m256, imm8
/// VPERMD ymm, ymm/m256, ymm
/// </summary>
public static Vector256<uint> PermuteVar8x32(Vector256<uint> left, Vector256<uint> control) => PermuteVar8x32(left, control);
/// <summary>
/// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
/// VPERMPS ymm, ymm/m256, imm8
/// VPERMPS ymm, ymm/m256, ymm
/// </summary>
public static Vector256<float> PermuteVar8x32(Vector256<float> left, Vector256<int> control) => PermuteVar8x32(left, control);

Expand Down Expand Up @@ -2604,12 +2604,12 @@ public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> sou
public static Vector256<byte> Shuffle(Vector256<byte> value, Vector256<byte> mask) => Shuffle(value, mask);
/// <summary>
/// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
/// VPSHUFD ymm, ymm, ymm/m256
/// VPSHUFD ymm, ymm/m256, imm8
/// </summary>
public static Vector256<int> Shuffle(Vector256<int> value, byte control) => Shuffle(value, control);
/// <summary>
/// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
/// VPSHUFD ymm, ymm, ymm/m256
/// VPSHUFD ymm, ymm/m256, imm8
/// </summary>
public static Vector256<uint> Shuffle(Vector256<uint> value, byte control) => Shuffle(value, control);

Expand Down
26 changes: 20 additions & 6 deletions src/jit/hwintrinsiclistxarch.h

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,17 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AVX2_PermuteVar8x32:
{
baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
// swap the two operands
GenTree* indexVector = impSIMDPopStack(TYP_SIMD32);
GenTree* sourceVector = impSIMDPopStack(TYP_SIMD32);
retNode =
gtNewSimdHWIntrinsicNode(TYP_SIMD32, indexVector, sourceVector, NI_AVX2_PermuteVar8x32, baseType, 32);
break;
}

case NI_AVX2_GatherMaskVector128:
case NI_AVX2_GatherMaskVector256:
{
Expand Down
2 changes: 2 additions & 0 deletions src/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ INST3(vpermilpsvar, "permilpsvar", IUM_WR, BAD_CODE, BAD_CODE,
INST3(vpermilpdvar, "permilpdvar", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values
INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_FLAGS_None) // Permute Double-Precision Floating-Point Values
INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements
INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements
INST3(vbroadcastf128, "broadcastf128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_FLAGS_None) // Broadcast packed float values read from memory to entire ymm register
INST3(vbroadcasti128, "broadcasti128", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_FLAGS_None) // Broadcast packed integer values read from memory to entire ymm register
INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
Expand Down
4 changes: 4 additions & 0 deletions src/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2493,11 +2493,14 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
case NI_AVX_Permute2x128:
case NI_AVX2_Blend:
case NI_AVX2_InsertVector128:
case NI_AVX2_MultipleSumAbsoluteDifferences:
case NI_AVX2_Permute2x128:
case NI_AVX2_Permute4x64:
case NI_AVX2_ShiftLeftLogical:
case NI_AVX2_ShiftRightArithmetic:
case NI_AVX2_ShiftRightLogical:
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
{
assert(supportsSIMDScalarLoads == false);

Expand Down Expand Up @@ -3089,6 +3092,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AVX_Permute2x128:
case NI_AVX_Shuffle:
case NI_AVX2_Blend:
case NI_AVX2_MultipleSumAbsoluteDifferences:
case NI_AVX2_Permute2x128:
case NI_PCLMULQDQ_CarrylessMultiply:
{
Expand Down
32 changes: 0 additions & 32 deletions tests/arm/Tests.lst
Original file line number Diff line number Diff line change
Expand Up @@ -87660,14 +87660,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[MultiplyLow_r.cmd_11412]
RelativePath=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_r\MultiplyLow_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_r
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[simplearg.cmd_11413]
RelativePath=JIT\Directed\RVAInit\simplearg\simplearg.cmd
WorkingDir=JIT\Directed\RVAInit\simplearg
Expand Down Expand Up @@ -88452,14 +88444,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[MultiplyLow_ro.cmd_11539]
RelativePath=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_ro\MultiplyLow_ro.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_ro
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[eventactivityidcontrol.cmd_11540]
RelativePath=tracing\eventactivityidcontrol\eventactivityidcontrol\eventactivityidcontrol.cmd
WorkingDir=tracing\eventactivityidcontrol\eventactivityidcontrol
Expand Down Expand Up @@ -89084,14 +89068,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[AlignRight_ro.cmd_11635]
RelativePath=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_ro\AlignRight_ro.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_ro
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[tracelogging.cmd_11638]
RelativePath=tracing\tracevalidation\tracelogging\tracelogging\tracelogging.cmd
WorkingDir=tracing\tracevalidation\tracelogging\tracelogging
Expand Down Expand Up @@ -89212,14 +89188,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[AlignRight_r.cmd_11655]
RelativePath=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_r\AlignRight_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_r
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[ConvertToSingle_r.cmd_11656]
RelativePath=JIT\HardwareIntrinsics\X86\Sse\ConvertToSingle_r\ConvertToSingle_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse\ConvertToSingle_r
Expand Down
32 changes: 0 additions & 32 deletions tests/arm64/Tests.lst
Original file line number Diff line number Diff line change
Expand Up @@ -87668,14 +87668,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[MultiplyLow_r.cmd_11731]
RelativePath=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_r\MultiplyLow_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_r
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[simplearg.cmd_11732]
RelativePath=JIT\Directed\RVAInit\simplearg\simplearg.cmd
WorkingDir=JIT\Directed\RVAInit\simplearg
Expand Down Expand Up @@ -88444,14 +88436,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[MultiplyLow_ro.cmd_11856]
RelativePath=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_ro\MultiplyLow_ro.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse2\MultiplyLow_ro
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[eventactivityidcontrol.cmd_11857]
RelativePath=tracing\eventactivityidcontrol\eventactivityidcontrol\eventactivityidcontrol.cmd
WorkingDir=tracing\eventactivityidcontrol\eventactivityidcontrol
Expand Down Expand Up @@ -89092,14 +89076,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[AlignRight_ro.cmd_11954]
RelativePath=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_ro\AlignRight_ro.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_ro
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[tracelogging.cmd_11957]
RelativePath=tracing\tracevalidation\tracelogging\tracelogging\tracelogging.cmd
WorkingDir=tracing\tracevalidation\tracelogging\tracelogging
Expand Down Expand Up @@ -89220,14 +89196,6 @@ MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;Pri1;NEW
HostStyle=0

[AlignRight_r.cmd_11974]
RelativePath=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_r\AlignRight_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Ssse3\AlignRight_r
Expected=0
MaxAllowedDurationSeconds=600
Categories=EXPECTED_PASS;NEW;EXCLUDED
HostStyle=0

[ConvertToSingle_r.cmd_11975]
RelativePath=JIT\HardwareIntrinsics\X86\Sse\ConvertToSingle_r\ConvertToSingle_r.cmd
WorkingDir=JIT\HardwareIntrinsics\X86\Sse\ConvertToSingle_r
Expand Down
Loading