Skip to content

Revert "[AArch64][SVE] Improve fixed-length addressing modes." #130263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

rj-jesus
Copy link
Contributor

@rj-jesus rj-jesus commented Mar 7, 2025

Reverts #129732.

I'll look into what's causing the buildbot reported in #129732 (comment) to fail offline.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:AArch64 labels Mar 7, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 7, 2025

@llvm/pr-subscribers-clang

Author: Ricardo Jesus (rj-jesus)

Changes

Reverts llvm/llvm-project#129732.

I'll look into what's causing the buildbot reported in #129732 (comment) to fail offline.


Patch is 21.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130263.diff

5 Files Affected:

  • (modified) clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c (+6-3)
  • (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+2-13)
  • (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+1-11)
  • (removed) llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll (-362)
  • (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll (+45-45)
diff --git a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
index 1391a1b09fbd1..0ed14b4b3b793 100644
--- a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
@@ -13,9 +13,12 @@
 
 void func(int *restrict a, int *restrict b) {
 // CHECK-LABEL: func
-// CHECK256-COUNT-8: str
-// CHECK512-COUNT-4: str
-// CHECK1024-COUNT-2: str
+// CHECK256-COUNT-1: str
+// CHECK256-COUNT-7: st1w
+// CHECK512-COUNT-1: str
+// CHECK512-COUNT-3: st1w
+// CHECK1024-COUNT-1: str
+// CHECK1024-COUNT-1: st1w
 // CHECK2048-COUNT-1: st1w
 #pragma clang loop vectorize(enable)
   for (int i = 0; i < 64; ++i)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 07bcd802962fa..3ca9107cb2ce5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7380,23 +7380,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
     return false;
 
   SDValue VScale = N.getOperand(1);
-  int64_t MulImm = std::numeric_limits<int64_t>::max();
-  if (VScale.getOpcode() == ISD::VSCALE) {
-    MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
-  } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
-    int64_t ByteOffset = C->getSExtValue();
-    const auto KnownVScale =
-        Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
-
-    if (!KnownVScale || ByteOffset % KnownVScale != 0)
-      return false;
-
-    MulImm = ByteOffset / KnownVScale;
-  } else
+  if (VScale.getOpcode() != ISD::VSCALE)
     return false;
 
   TypeSize TS = MemVT.getSizeInBits();
   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
+  int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
 
   if ((MulImm % MemWidthBytes) != 0)
     return false;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index f5ffc72cae537..c6eb77e3bc3ba 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -391,7 +391,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   void mirFileLoaded(MachineFunction &MF) const override;
 
   // Return the known range for the bit length of SVE data registers. A value
-  // of 0 means nothing is known about that particular limit beyond what's
+  // of 0 means nothing is known about that particular limit beyong what's
   // implied by the architecture.
   unsigned getMaxSVEVectorSizeInBits() const {
     assert(isSVEorStreamingSVEAvailable() &&
@@ -405,16 +405,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
     return MinSVEVectorSizeInBits;
   }
 
-  // Return the known bit length of SVE data registers. A value of 0 means the
-  // length is unkown beyond what's implied by the architecture.
-  unsigned getSVEVectorSizeInBits() const {
-    assert(isSVEorStreamingSVEAvailable() &&
-           "Tried to get SVE vector length without SVE support!");
-    if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
-      return MaxSVEVectorSizeInBits;
-    return 0;
-  }
-
   bool useSVEForFixedLengthVectors() const {
     if (!isSVEorStreamingSVEAvailable())
       return false;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
deleted file mode 100644
index 700bbe4f060ca..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
+++ /dev/null
@@ -1,362 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s | FileCheck %s --check-prefix=CHECK-256
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 -aarch64-sve-vector-bits-max=512 < %s | FileCheck %s --check-prefix=CHECK-512
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=1024 -aarch64-sve-vector-bits-max=1024 < %s | FileCheck %s --check-prefix=CHECK-1024
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=2048 -aarch64-sve-vector-bits-max=2048 < %s | FileCheck %s --check-prefix=CHECK-2048
-
-define void @nxv16i8(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov w8, #256 // =0x100
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
-; CHECK-NEXT:    st1b { z0.b }, p0, [x1, x8]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv16i8:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv16i8:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv16i8:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv16i8:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv16i8:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 256
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 256
-  %x = load <vscale x 16 x i8>, ptr %ldoff, align 1
-  store <vscale x 16 x i8> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv8i16(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov x8, #128 // =0x80
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
-; CHECK-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv8i16:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv8i16:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv8i16:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv8i16:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv8i16:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i16, ptr %ldptr, i64 128
-  %stoff = getelementptr inbounds nuw i16, ptr %stptr, i64 128
-  %x = load <vscale x 8 x i16>, ptr %ldoff, align 2
-  store <vscale x 8 x i16> %x, ptr %stoff, align 2
-  ret void
-}
-
-define void @nxv4i32(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov x8, #64 // =0x40
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4i32:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4i32:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4i32:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4i32:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4i32:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i32, ptr %ldptr, i64 64
-  %stoff = getelementptr inbounds nuw i32, ptr %stptr, i64 64
-  %x = load <vscale x 4 x i32>, ptr %ldoff, align 4
-  store <vscale x 4 x i32> %x, ptr %stoff, align 4
-  ret void
-}
-
-define void @nxv2i64(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #32 // =0x20
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv2i64:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv2i64:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv2i64:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv2i64:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv2i64:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i64, ptr %ldptr, i64 32
-  %stoff = getelementptr inbounds nuw i64, ptr %stptr, i64 32
-  %x = load <vscale x 2 x i64>, ptr %ldoff, align 8
-  store <vscale x 2 x i64> %x, ptr %stoff, align 8
-  ret void
-}
-
-define void @nxv4i8(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov w8, #32 // =0x20
-; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4i8:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ptrue p0.s
-; CHECK-128-NEXT:    mov w8, #32 // =0x20
-; CHECK-128-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-128-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4i8:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ptrue p0.s
-; CHECK-256-NEXT:    ld1b { z0.s }, p0/z, [x0, #4, mul vl]
-; CHECK-256-NEXT:    st1b { z0.s }, p0, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4i8:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ptrue p0.s
-; CHECK-512-NEXT:    ld1b { z0.s }, p0/z, [x0, #2, mul vl]
-; CHECK-512-NEXT:    st1b { z0.s }, p0, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4i8:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ptrue p0.s
-; CHECK-1024-NEXT:    ld1b { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    st1b { z0.s }, p0, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4i8:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.s
-; CHECK-2048-NEXT:    mov w8, #32 // =0x20
-; CHECK-2048-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-2048-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 32
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 32
-  %x = load <vscale x 4 x i8>, ptr %ldoff, align 1
-  store <vscale x 4 x i8> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv2f32(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv2f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #16 // =0x10
-; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv2f32:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ptrue p0.d
-; CHECK-128-NEXT:    mov x8, #16 // =0x10
-; CHECK-128-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-128-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv2f32:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ptrue p0.d
-; CHECK-256-NEXT:    ld1w { z0.d }, p0/z, [x0, #4, mul vl]
-; CHECK-256-NEXT:    st1w { z0.d }, p0, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv2f32:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ptrue p0.d
-; CHECK-512-NEXT:    ld1w { z0.d }, p0/z, [x0, #2, mul vl]
-; CHECK-512-NEXT:    st1w { z0.d }, p0, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv2f32:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ptrue p0.d
-; CHECK-1024-NEXT:    ld1w { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    st1w { z0.d }, p0, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv2f32:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.d
-; CHECK-2048-NEXT:    mov x8, #16 // =0x10
-; CHECK-2048-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-2048-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 64
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 64
-  %x = load <vscale x 2 x float>, ptr %ldoff, align 1
-  store <vscale x 2 x float> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv4f64(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #16 // =0x10
-; CHECK-NEXT:    add x9, x0, #128
-; CHECK-NEXT:    ldr z1, [x9, #1, mul vl]
-; CHECK-NEXT:    add x9, x1, #128
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-NEXT:    str z1, [x9, #1, mul vl]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4f64:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    add x8, x0, #128
-; CHECK-128-NEXT:    ldr z1, [x0, #8, mul vl]
-; CHECK-128-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-128-NEXT:    add x8, x1, #128
-; CHECK-128-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-128-NEXT:    str z1, [x1, #8, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4f64:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    add x8, x0, #128
-; CHECK-256-NEXT:    ldr z1, [x0, #4, mul vl]
-; CHECK-256-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-256-NEXT:    add x8, x1, #128
-; CHECK-256-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-256-NEXT:    str z1, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4f64:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    add x8, x0, #128
-; CHECK-512-NEXT:    ldr z1, [x0, #2, mul vl]
-; CHECK-512-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-512-NEXT:    add x8, x1, #128
-; CHECK-512-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-512-NEXT:    str z1, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4f64:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    add x8, x0, #128
-; CHECK-1024-NEXT:    ldr z1, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-1024-NEXT:    add x8, x1, #128
-; CHECK-1024-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-1024-NEXT:    str z1, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4f64:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.d
-; CHECK-2048-NEXT:    mov x8, #16 // =0x10
-; CHECK-2048-NEXT:    add x9, x0, #128
-; CHECK-2048-NEXT:    ldr z1, [x9, #1, mul vl]
-; CHECK-2048-NEXT:    add x9, x1, #128
-; CHECK-2048-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-2048-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-2048-NEXT:    str z1, [x9, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 128
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 128
-  %x = load <vscale x 4 x double>, ptr %ldoff, align 1
-  store <vscale x 4 x double> %x, ptr %stoff, align 1
-  ret void
-}
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 2d4cdfa7278b9..e33bc8da97c05 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,64 +30,64 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
 ; CHECK-NEXT:  // %bb.1: // %vector.body
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov x9, #24 // =0x18
+; CHECK-NEXT:    mov x9, #8 // =0x8
+; CHECK-NEXT:    mov x10, #24 // =0x18
 ; CHECK-NEXT:    umov w8, v0.b[8]
-; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov z3.d, z0.d
-; CHECK-NEXT:    mov v2.b[1], v0.b[1]
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #16
-; CHECK-NEXT:    fmov s1, w8
-; CHECK-NEXT:    mov x8, #8 // =0x8
-; CHECK-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    mov v1.b[1], v0.b[9]
-; CHECK-NEXT:    mov v2.b[2], v0.b[2]
-; CHECK-NEXT:    mov v1.b[2], v0.b[10]
-; CHECK-NEXT:    mov v2.b[3], v0.b[3]
-; CHECK-NEXT:    mov v1.b[3], v0.b[11]
-; CHECK-NEXT:    mov v2.b[4], v0.b[4]
-; CHECK-NEXT:    mov v1.b[4], v0.b[12]
-; CHECK-NEXT:    mov v2.b[5], v0.b[5]
-; CHECK-NEXT:    mov v1.b[5], v0.b[13]
-; CHECK-NEXT:    mov v2.b[6], v0.b[6]
-; CHECK-NEXT:    mov v1.b[6], v0.b[14]
-; CHECK-NEXT:    mov v2.b[7], v0.b[7]
-; CHECK-NEXT:    mov v1.b[7], v0.b[15]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v1.b[1], v0.b[1]
+; CHECK-NEXT:    fmov s2, w8
+; CHECK-NEXT:    mov x8, #16 // =0x10
+; CHECK-NEXT:    mov v2.b[1], v0.b[9]
+; CHECK-NEXT:    mov v1.b[2], v0.b[2]
+; CHECK-NEXT:    mov v2.b[2], v0.b[10]
+; CHECK-NEXT:    mov v1.b[3], v0.b[3]
+; CHECK-NEXT:    mov v2.b[3], v0.b[11]
+; CHECK-NEXT:    mov v1.b[4], v0.b[4]
+; CHECK-NEXT:    mov v2.b[4], v0.b[12]
+; CHECK-NEXT:    mov v1.b[5], v0.b[5]
+; CHECK-NEXT:    mov v2.b[5], v0.b[13]
+; CHECK-NEXT:    mov v1.b[6], v0.b[6]
+; CHECK-NEXT:    mov v2.b[6], v0.b[14]
+; CHECK-NEXT:    mov v1.b[7], v0.b[7]
+; CHECK-NEXT:    mov v2.b[7], v0.b[15]
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    uunpklo z1.h, z1.b
+; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
-; CHECK-NEXT:    uunpklo z0.h, z1.b
-; CHECK-NEXT:    uunpklo z1.h, z3.b
-; CHECK-NEXT:    uunpklo z3.h, z4.b
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    uunpklo z3.h, z3.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    lsl z1.s, z1.s, #31
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
-; CHECK-NEXT:    lsl z1.s, z1.s, #31
+; CHECK-NEXT:    asr z1.s, z1.s, #31
+; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    asr z0.s, z0.s, #31
+; CHECK-NEXT:    and z1.s, z1.s, #0x1
 ; CHECK-NEXT:    lsl z3.s, z3.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    asr z0.s, z0.s, #31
-; CHECK-NEXT:    asr z1.s, z1.s, #31
+; CHECK-NEXT:    and z0.s, z0.s, #0x1
+; CHECK-NEXT:    cmpne p4.s, p0/z, z1.s, #0
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    asr z3.s, z3.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
-; CHECK-NEXT:    and z0.s, z0.s, #0x1
-; CHECK-NEXT:    and z1.s, z1.s, #0x1
-; CHECK-NEXT:    and z3.s, z3.s, #0x1
-; CHECK-NEXT:    cmpne p4.s, p0/z, z2.s, #0
-; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x0]
 ; CHECK-NEXT:    cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT:    cmpne p2.s, p0/z, z1.s, #0
-; CHECK-NEXT:    cmpne p3.s, p0/z, z3.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    ld1w { z1.s }, p0/z,...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Mar 7, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Ricardo Jesus (rj-jesus)

Changes

Reverts llvm/llvm-project#129732.

I'll look into what's causing the buildbot reported in #129732 (comment) to fail offline.


Patch is 21.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130263.diff

5 Files Affected:

  • (modified) clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c (+6-3)
  • (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+2-13)
  • (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+1-11)
  • (removed) llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll (-362)
  • (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll (+45-45)
diff --git a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
index 1391a1b09fbd1..0ed14b4b3b793 100644
--- a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
@@ -13,9 +13,12 @@
 
 void func(int *restrict a, int *restrict b) {
 // CHECK-LABEL: func
-// CHECK256-COUNT-8: str
-// CHECK512-COUNT-4: str
-// CHECK1024-COUNT-2: str
+// CHECK256-COUNT-1: str
+// CHECK256-COUNT-7: st1w
+// CHECK512-COUNT-1: str
+// CHECK512-COUNT-3: st1w
+// CHECK1024-COUNT-1: str
+// CHECK1024-COUNT-1: st1w
 // CHECK2048-COUNT-1: st1w
 #pragma clang loop vectorize(enable)
   for (int i = 0; i < 64; ++i)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 07bcd802962fa..3ca9107cb2ce5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7380,23 +7380,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
     return false;
 
   SDValue VScale = N.getOperand(1);
-  int64_t MulImm = std::numeric_limits<int64_t>::max();
-  if (VScale.getOpcode() == ISD::VSCALE) {
-    MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
-  } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
-    int64_t ByteOffset = C->getSExtValue();
-    const auto KnownVScale =
-        Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
-
-    if (!KnownVScale || ByteOffset % KnownVScale != 0)
-      return false;
-
-    MulImm = ByteOffset / KnownVScale;
-  } else
+  if (VScale.getOpcode() != ISD::VSCALE)
     return false;
 
   TypeSize TS = MemVT.getSizeInBits();
   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
+  int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
 
   if ((MulImm % MemWidthBytes) != 0)
     return false;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index f5ffc72cae537..c6eb77e3bc3ba 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -391,7 +391,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   void mirFileLoaded(MachineFunction &MF) const override;
 
   // Return the known range for the bit length of SVE data registers. A value
-  // of 0 means nothing is known about that particular limit beyond what's
+  // of 0 means nothing is known about that particular limit beyong what's
   // implied by the architecture.
   unsigned getMaxSVEVectorSizeInBits() const {
     assert(isSVEorStreamingSVEAvailable() &&
@@ -405,16 +405,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
     return MinSVEVectorSizeInBits;
   }
 
-  // Return the known bit length of SVE data registers. A value of 0 means the
-  // length is unkown beyond what's implied by the architecture.
-  unsigned getSVEVectorSizeInBits() const {
-    assert(isSVEorStreamingSVEAvailable() &&
-           "Tried to get SVE vector length without SVE support!");
-    if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
-      return MaxSVEVectorSizeInBits;
-    return 0;
-  }
-
   bool useSVEForFixedLengthVectors() const {
     if (!isSVEorStreamingSVEAvailable())
       return false;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
deleted file mode 100644
index 700bbe4f060ca..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
+++ /dev/null
@@ -1,362 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s | FileCheck %s --check-prefix=CHECK-256
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 -aarch64-sve-vector-bits-max=512 < %s | FileCheck %s --check-prefix=CHECK-512
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=1024 -aarch64-sve-vector-bits-max=1024 < %s | FileCheck %s --check-prefix=CHECK-1024
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=2048 -aarch64-sve-vector-bits-max=2048 < %s | FileCheck %s --check-prefix=CHECK-2048
-
-define void @nxv16i8(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov w8, #256 // =0x100
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
-; CHECK-NEXT:    st1b { z0.b }, p0, [x1, x8]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv16i8:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv16i8:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv16i8:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv16i8:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv16i8:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 256
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 256
-  %x = load <vscale x 16 x i8>, ptr %ldoff, align 1
-  store <vscale x 16 x i8> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv8i16(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov x8, #128 // =0x80
-; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
-; CHECK-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv8i16:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv8i16:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv8i16:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv8i16:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv8i16:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i16, ptr %ldptr, i64 128
-  %stoff = getelementptr inbounds nuw i16, ptr %stptr, i64 128
-  %x = load <vscale x 8 x i16>, ptr %ldoff, align 2
-  store <vscale x 8 x i16> %x, ptr %stoff, align 2
-  ret void
-}
-
-define void @nxv4i32(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov x8, #64 // =0x40
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4i32:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4i32:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4i32:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4i32:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4i32:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i32, ptr %ldptr, i64 64
-  %stoff = getelementptr inbounds nuw i32, ptr %stptr, i64 64
-  %x = load <vscale x 4 x i32>, ptr %ldoff, align 4
-  store <vscale x 4 x i32> %x, ptr %stoff, align 4
-  ret void
-}
-
-define void @nxv2i64(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv2i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #32 // =0x20
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv2i64:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ldr z0, [x0, #16, mul vl]
-; CHECK-128-NEXT:    str z0, [x1, #16, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv2i64:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ldr z0, [x0, #8, mul vl]
-; CHECK-256-NEXT:    str z0, [x1, #8, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv2i64:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ldr z0, [x0, #4, mul vl]
-; CHECK-512-NEXT:    str z0, [x1, #4, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv2i64:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-1024-NEXT:    str z0, [x1, #2, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv2i64:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ldr z0, [x0, #1, mul vl]
-; CHECK-2048-NEXT:    str z0, [x1, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i64, ptr %ldptr, i64 32
-  %stoff = getelementptr inbounds nuw i64, ptr %stptr, i64 32
-  %x = load <vscale x 2 x i64>, ptr %ldoff, align 8
-  store <vscale x 2 x i64> %x, ptr %stoff, align 8
-  ret void
-}
-
-define void @nxv4i8(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov w8, #32 // =0x20
-; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4i8:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ptrue p0.s
-; CHECK-128-NEXT:    mov w8, #32 // =0x20
-; CHECK-128-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-128-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4i8:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ptrue p0.s
-; CHECK-256-NEXT:    ld1b { z0.s }, p0/z, [x0, #4, mul vl]
-; CHECK-256-NEXT:    st1b { z0.s }, p0, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4i8:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ptrue p0.s
-; CHECK-512-NEXT:    ld1b { z0.s }, p0/z, [x0, #2, mul vl]
-; CHECK-512-NEXT:    st1b { z0.s }, p0, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4i8:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ptrue p0.s
-; CHECK-1024-NEXT:    ld1b { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    st1b { z0.s }, p0, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4i8:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.s
-; CHECK-2048-NEXT:    mov w8, #32 // =0x20
-; CHECK-2048-NEXT:    ld1b { z0.s }, p0/z, [x0, x8]
-; CHECK-2048-NEXT:    st1b { z0.s }, p0, [x1, x8]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 32
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 32
-  %x = load <vscale x 4 x i8>, ptr %ldoff, align 1
-  store <vscale x 4 x i8> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv2f32(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv2f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #16 // =0x10
-; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv2f32:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    ptrue p0.d
-; CHECK-128-NEXT:    mov x8, #16 // =0x10
-; CHECK-128-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-128-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv2f32:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    ptrue p0.d
-; CHECK-256-NEXT:    ld1w { z0.d }, p0/z, [x0, #4, mul vl]
-; CHECK-256-NEXT:    st1w { z0.d }, p0, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv2f32:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    ptrue p0.d
-; CHECK-512-NEXT:    ld1w { z0.d }, p0/z, [x0, #2, mul vl]
-; CHECK-512-NEXT:    st1w { z0.d }, p0, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv2f32:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    ptrue p0.d
-; CHECK-1024-NEXT:    ld1w { z0.d }, p0/z, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    st1w { z0.d }, p0, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv2f32:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.d
-; CHECK-2048-NEXT:    mov x8, #16 // =0x10
-; CHECK-2048-NEXT:    ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
-; CHECK-2048-NEXT:    st1w { z0.d }, p0, [x1, x8, lsl #2]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 64
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 64
-  %x = load <vscale x 2 x float>, ptr %ldoff, align 1
-  store <vscale x 2 x float> %x, ptr %stoff, align 1
-  ret void
-}
-
-define void @nxv4f64(ptr %ldptr, ptr %stptr) {
-; CHECK-LABEL: nxv4f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov x8, #16 // =0x10
-; CHECK-NEXT:    add x9, x0, #128
-; CHECK-NEXT:    ldr z1, [x9, #1, mul vl]
-; CHECK-NEXT:    add x9, x1, #128
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-NEXT:    str z1, [x9, #1, mul vl]
-; CHECK-NEXT:    ret
-;
-; CHECK-128-LABEL: nxv4f64:
-; CHECK-128:       // %bb.0:
-; CHECK-128-NEXT:    add x8, x0, #128
-; CHECK-128-NEXT:    ldr z1, [x0, #8, mul vl]
-; CHECK-128-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-128-NEXT:    add x8, x1, #128
-; CHECK-128-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-128-NEXT:    str z1, [x1, #8, mul vl]
-; CHECK-128-NEXT:    ret
-;
-; CHECK-256-LABEL: nxv4f64:
-; CHECK-256:       // %bb.0:
-; CHECK-256-NEXT:    add x8, x0, #128
-; CHECK-256-NEXT:    ldr z1, [x0, #4, mul vl]
-; CHECK-256-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-256-NEXT:    add x8, x1, #128
-; CHECK-256-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-256-NEXT:    str z1, [x1, #4, mul vl]
-; CHECK-256-NEXT:    ret
-;
-; CHECK-512-LABEL: nxv4f64:
-; CHECK-512:       // %bb.0:
-; CHECK-512-NEXT:    add x8, x0, #128
-; CHECK-512-NEXT:    ldr z1, [x0, #2, mul vl]
-; CHECK-512-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-512-NEXT:    add x8, x1, #128
-; CHECK-512-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-512-NEXT:    str z1, [x1, #2, mul vl]
-; CHECK-512-NEXT:    ret
-;
-; CHECK-1024-LABEL: nxv4f64:
-; CHECK-1024:       // %bb.0:
-; CHECK-1024-NEXT:    add x8, x0, #128
-; CHECK-1024-NEXT:    ldr z1, [x0, #1, mul vl]
-; CHECK-1024-NEXT:    ldr z0, [x8, #1, mul vl]
-; CHECK-1024-NEXT:    add x8, x1, #128
-; CHECK-1024-NEXT:    str z0, [x8, #1, mul vl]
-; CHECK-1024-NEXT:    str z1, [x1, #1, mul vl]
-; CHECK-1024-NEXT:    ret
-;
-; CHECK-2048-LABEL: nxv4f64:
-; CHECK-2048:       // %bb.0:
-; CHECK-2048-NEXT:    ptrue p0.d
-; CHECK-2048-NEXT:    mov x8, #16 // =0x10
-; CHECK-2048-NEXT:    add x9, x0, #128
-; CHECK-2048-NEXT:    ldr z1, [x9, #1, mul vl]
-; CHECK-2048-NEXT:    add x9, x1, #128
-; CHECK-2048-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-2048-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-2048-NEXT:    str z1, [x9, #1, mul vl]
-; CHECK-2048-NEXT:    ret
-  %ldoff = getelementptr inbounds nuw i8, ptr %ldptr, i64 128
-  %stoff = getelementptr inbounds nuw i8, ptr %stptr, i64 128
-  %x = load <vscale x 4 x double>, ptr %ldoff, align 1
-  store <vscale x 4 x double> %x, ptr %stoff, align 1
-  ret void
-}
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 2d4cdfa7278b9..e33bc8da97c05 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,64 +30,64 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
 ; CHECK-NEXT:  // %bb.1: // %vector.body
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov x9, #24 // =0x18
+; CHECK-NEXT:    mov x9, #8 // =0x8
+; CHECK-NEXT:    mov x10, #24 // =0x18
 ; CHECK-NEXT:    umov w8, v0.b[8]
-; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov z3.d, z0.d
-; CHECK-NEXT:    mov v2.b[1], v0.b[1]
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #16
-; CHECK-NEXT:    fmov s1, w8
-; CHECK-NEXT:    mov x8, #8 // =0x8
-; CHECK-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    mov v1.b[1], v0.b[9]
-; CHECK-NEXT:    mov v2.b[2], v0.b[2]
-; CHECK-NEXT:    mov v1.b[2], v0.b[10]
-; CHECK-NEXT:    mov v2.b[3], v0.b[3]
-; CHECK-NEXT:    mov v1.b[3], v0.b[11]
-; CHECK-NEXT:    mov v2.b[4], v0.b[4]
-; CHECK-NEXT:    mov v1.b[4], v0.b[12]
-; CHECK-NEXT:    mov v2.b[5], v0.b[5]
-; CHECK-NEXT:    mov v1.b[5], v0.b[13]
-; CHECK-NEXT:    mov v2.b[6], v0.b[6]
-; CHECK-NEXT:    mov v1.b[6], v0.b[14]
-; CHECK-NEXT:    mov v2.b[7], v0.b[7]
-; CHECK-NEXT:    mov v1.b[7], v0.b[15]
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v1.b[1], v0.b[1]
+; CHECK-NEXT:    fmov s2, w8
+; CHECK-NEXT:    mov x8, #16 // =0x10
+; CHECK-NEXT:    mov v2.b[1], v0.b[9]
+; CHECK-NEXT:    mov v1.b[2], v0.b[2]
+; CHECK-NEXT:    mov v2.b[2], v0.b[10]
+; CHECK-NEXT:    mov v1.b[3], v0.b[3]
+; CHECK-NEXT:    mov v2.b[3], v0.b[11]
+; CHECK-NEXT:    mov v1.b[4], v0.b[4]
+; CHECK-NEXT:    mov v2.b[4], v0.b[12]
+; CHECK-NEXT:    mov v1.b[5], v0.b[5]
+; CHECK-NEXT:    mov v2.b[5], v0.b[13]
+; CHECK-NEXT:    mov v1.b[6], v0.b[6]
+; CHECK-NEXT:    mov v2.b[6], v0.b[14]
+; CHECK-NEXT:    mov v1.b[7], v0.b[7]
+; CHECK-NEXT:    mov v2.b[7], v0.b[15]
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    uunpklo z1.h, z1.b
+; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
-; CHECK-NEXT:    uunpklo z0.h, z1.b
-; CHECK-NEXT:    uunpklo z1.h, z3.b
-; CHECK-NEXT:    uunpklo z3.h, z4.b
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    uunpklo z3.h, z3.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    lsl z1.s, z1.s, #31
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
-; CHECK-NEXT:    lsl z1.s, z1.s, #31
+; CHECK-NEXT:    asr z1.s, z1.s, #31
+; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    asr z0.s, z0.s, #31
+; CHECK-NEXT:    and z1.s, z1.s, #0x1
 ; CHECK-NEXT:    lsl z3.s, z3.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    asr z0.s, z0.s, #31
-; CHECK-NEXT:    asr z1.s, z1.s, #31
+; CHECK-NEXT:    and z0.s, z0.s, #0x1
+; CHECK-NEXT:    cmpne p4.s, p0/z, z1.s, #0
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    asr z3.s, z3.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
-; CHECK-NEXT:    and z0.s, z0.s, #0x1
-; CHECK-NEXT:    and z1.s, z1.s, #0x1
-; CHECK-NEXT:    and z3.s, z3.s, #0x1
-; CHECK-NEXT:    cmpne p4.s, p0/z, z2.s, #0
-; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x0]
 ; CHECK-NEXT:    cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT:    cmpne p2.s, p0/z, z1.s, #0
-; CHECK-NEXT:    cmpne p3.s, p0/z, z3.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT:    ld1w { z1.s }, p0/z,...
[truncated]

@rj-jesus
Copy link
Contributor Author

rj-jesus commented Mar 7, 2025

I'll commit this to get the bot back to green while I look into it offline.

@rj-jesus rj-jesus merged commit 21610e3 into main Mar 7, 2025
9 of 13 checks passed
@rj-jesus rj-jesus deleted the revert-129732-rjj/aarch64-sve-vls-imm-addressing-modes branch March 7, 2025 09:35
rj-jesus added a commit to rj-jesus/llvm-project that referenced this pull request Mar 10, 2025
jph-13 pushed a commit to jph-13/llvm-project that referenced this pull request Mar 21, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AArch64 clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants