diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5cc001c44e7a2..d3eb82a8c5559 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20240,6 +20240,8 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, if (!SrlOp1) return SDValue(); unsigned ShiftValue = SrlOp1->getZExtValue(); + if (ShiftValue < 1 || ShiftValue > ResVT.getScalarSizeInBits()) + return SDValue(); SDValue Add = Srl->getOperand(0); if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse()) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll index f94daa45fb82a..a913177623df9 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll @@ -100,6 +100,23 @@ define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index){ ret void } +define void @neg_zero_shift(ptr %ptr, ptr %dst, i64 %index){ +; CHECK-LABEL: neg_zero_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] +; CHECK-NEXT: ret + %load = load , ptr %ptr, align 2 + %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 0, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i16, ptr %dst, i64 %index + store %3, ptr %4, align 1 + ret void +} + define void @wide_add_shift_add_rshrnb_b(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: wide_add_shift_add_rshrnb_b: ; CHECK: // %bb.0: @@ -142,6 +159,52 @@ define void @wide_add_shift_add_rshrnb_h(ptr %dest, i64 %index, %arg1){ +; CHECK-LABEL: wide_add_shift_add_rshrnb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: rshrnb z1.s, z1.d, #32 +; CHECK-NEXT: rshrnb z0.s, z0.d, #32 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %1 = add %arg1, shufflevector ( insertelement ( poison, i64 2147483648, i64 0), poison, zeroinitializer) + %2 = lshr %1, shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) + %3 = getelementptr inbounds i32, ptr %dest, i64 %index + %load = load , ptr %3, align 4 + %4 = trunc %2 to + %5 = add %load, %4 + store %5, ptr %3, align 4 + ret void +} + +; Do not emit rshrnb if the shift amount is larger than the dest eltsize in bits +define void @neg_wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, %arg1){ +; CHECK-LABEL: neg_wide_add_shift_add_rshrnb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0x800000000000 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add z0.d, z0.d, z2.d +; CHECK-NEXT: add z1.d, z1.d, z2.d +; CHECK-NEXT: lsr z1.d, z1.d, #48 +; CHECK-NEXT: lsr z0.d, z0.d, #48 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %1 = add %arg1, shufflevector ( insertelement ( poison, i64 140737488355328, i64 0), poison, zeroinitializer) + %2 = lshr %1, shufflevector ( insertelement ( poison, i64 48, i64 0), poison, zeroinitializer) + %3 = getelementptr inbounds i32, ptr %dest, i64 %index + %load = load , ptr %3, align 4 + %4 = trunc %2 to + %5 = add %load, %4 + store %5, ptr %3, align 4 + ret void +} + define void @neg_trunc_lsr_add_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, %add_op1){ ; CHECK-LABEL: neg_trunc_lsr_add_op1_not_splat: ; CHECK: // %bb.0: