Skip to content

Commit 00934be

Browse files
authored
[AArch64] Funnel Shift now uses rev32/rev64 instructions (#136707)
Fixes #130469 Now uses REV32/REV64 instructions to complete operation. New Output: ``` G1: rev64 v0.4s, v0.4s ret G2: rev32 v0.8h, v0.8h ret G3: rev16 v0.16b, v0.16b ret G4: rev32 v0.4h, v0.4h ret G5: rev16 v0.8b, v0.8b ret ``` Old Output: ``` G1: shl v1.2d, v0.2d, #32 usra v1.2d, v0.2d, #32 mov v0.16b, v1.16b ret G2: shl v1.4s, v0.4s, #16 usra v1.4s, v0.4s, #16 mov v0.16b, v1.16b ret G3: rev16 v0.16b, v0.16b ret G4: shl v1.2s, v0.2s, #16 usra v1.2s, v0.2s, #16 fmov d0, d1 ret G5: rev16 v0.8b, v0.8b ret ```
1 parent eea1efe commit 00934be

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+11
Original file line numberDiff line numberDiff line change
@@ -5688,6 +5688,17 @@ def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))),
56885688
def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
56895689
(v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
56905690

5691+
// Patterns for funnel shifts to be matched to equivalent REV instructions
5692+
def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))),
5693+
(v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))),
5694+
(v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>;
5695+
def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))),
5696+
(v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))),
5697+
(v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>;
5698+
def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))),
5699+
(v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))),
5700+
(v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>;
5701+
56915702
//===----------------------------------------------------------------------===//
56925703
// Advanced SIMD three vector instructions.
56935704
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/fsh.ll

+50
Original file line numberDiff line numberDiff line change
@@ -4506,3 +4506,53 @@ entry:
45064506
%d = call <2 x i128> @llvm.fshr(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 3, i128 3>)
45074507
ret <2 x i128> %d
45084508
}
4509+
4510+
4511+
4512+
define <2 x i64> @fshl_to_rev2i64(<2 x i64> %r) {
4513+
; CHECK-SD-LABEL: fshl_to_rev2i64:
4514+
; CHECK-SD: // %bb.0:
4515+
; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
4516+
; CHECK-SD-NEXT: ret
4517+
;
4518+
; CHECK-GI-LABEL: fshl_to_rev2i64:
4519+
; CHECK-GI: // %bb.0:
4520+
; CHECK-GI-NEXT: shl v1.2d, v0.2d, #32
4521+
; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32
4522+
; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
4523+
; CHECK-GI-NEXT: ret
4524+
%or = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 32))
4525+
ret <2 x i64> %or
4526+
}
4527+
4528+
define <4 x i32> @fshl_to_rev4i32(<4 x i32> %r) {
4529+
; CHECK-SD-LABEL: fshl_to_rev4i32:
4530+
; CHECK-SD: // %bb.0:
4531+
; CHECK-SD-NEXT: rev32 v0.8h, v0.8h
4532+
; CHECK-SD-NEXT: ret
4533+
;
4534+
; CHECK-GI-LABEL: fshl_to_rev4i32:
4535+
; CHECK-GI: // %bb.0:
4536+
; CHECK-GI-NEXT: shl v1.4s, v0.4s, #16
4537+
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #16
4538+
; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
4539+
; CHECK-GI-NEXT: ret
4540+
%or = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 16))
4541+
ret <4 x i32> %or
4542+
}
4543+
4544+
define <2 x i32> @fshl_to_rev2i32(<2 x i32> %r) {
4545+
; CHECK-SD-LABEL: fshl_to_rev2i32:
4546+
; CHECK-SD: // %bb.0:
4547+
; CHECK-SD-NEXT: rev32 v0.4h, v0.4h
4548+
; CHECK-SD-NEXT: ret
4549+
;
4550+
; CHECK-GI-LABEL: fshl_to_rev2i32:
4551+
; CHECK-GI: // %bb.0:
4552+
; CHECK-GI-NEXT: shl v1.2s, v0.2s, #16
4553+
; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #16
4554+
; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
4555+
; CHECK-GI-NEXT: ret
4556+
%or = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %r, <2 x i32> %r, <2 x i32> splat (i32 16))
4557+
ret <2 x i32> %or
4558+
}

0 commit comments

Comments
 (0)