Skip to content

Commit 52078d5

Browse files
committed
Merging r278559:
------------------------------------------------------------------------ r278559 | efriedma | 2016-08-12 13:28:02 -0700 (Fri, 12 Aug 2016) | 7 lines [AArch64LoadStoreOpt] Handle offsets correctly for post-indexed paired loads. Trunk would try to create something like "stp x9, x8, [x0], llvm#512", which isn't actually a valid instruction. Differential revision: https://reviews.llvm.org/D23368 ------------------------------------------------------------------------ llvm-svn: 279123
1 parent 5050823 commit 52078d5

File tree

2 files changed

+107
-6
lines changed

2 files changed

+107
-6
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -1427,9 +1427,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
14271427
default:
14281428
break;
14291429
case AArch64::SUBXri:
1430-
// Negate the offset for a SUB instruction.
1431-
Offset *= -1;
1432-
// FALLTHROUGH
14331430
case AArch64::ADDXri:
14341431
// Make sure it's a vanilla immediate operand, not a relocation or
14351432
// anything else we can't handle.
@@ -1447,6 +1444,9 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
14471444

14481445
bool IsPairedInsn = isPairedLdSt(MemMI);
14491446
int UpdateOffset = MI.getOperand(2).getImm();
1447+
if (MI.getOpcode() == AArch64::SUBXri)
1448+
UpdateOffset = -UpdateOffset;
1449+
14501450
// For non-paired load/store instructions, the immediate must fit in a
14511451
// signed 9-bit integer.
14521452
if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
@@ -1461,13 +1461,13 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
14611461
break;
14621462

14631463
int ScaledOffset = UpdateOffset / Scale;
1464-
if (ScaledOffset > 64 || ScaledOffset < -64)
1464+
if (ScaledOffset > 63 || ScaledOffset < -64)
14651465
break;
14661466
}
14671467

14681468
// If we have a non-zero Offset, we check that it matches the amount
14691469
// we're adding to the register.
1470-
if (!Offset || Offset == MI.getOperand(2).getImm())
1470+
if (!Offset || Offset == UpdateOffset)
14711471
return true;
14721472
break;
14731473
}

llvm/test/CodeGen/AArch64/ldst-opt.ll

+102-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
1+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s
22

33
; This file contains tests for the AArch64 load/store optimizer.
44

@@ -1232,3 +1232,104 @@ for.body:
12321232
end:
12331233
ret void
12341234
}
1235+
1236+
define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind {
1237+
; CHECK-LABEL: post-indexed-sub-doubleword-offset-min
1238+
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256
1239+
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256
1240+
br label %for.body
1241+
for.body:
1242+
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1243+
%phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1244+
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1245+
%gep1 = getelementptr i64, i64* %phi1, i64 1
1246+
%load1 = load i64, i64* %gep1
1247+
%gep2 = getelementptr i64, i64* %phi2, i64 1
1248+
store i64 %load1, i64* %gep2
1249+
%load2 = load i64, i64* %phi1
1250+
store i64 %load2, i64* %phi2
1251+
%dec.i = add nsw i64 %i, -1
1252+
%gep3 = getelementptr i64, i64* %phi2, i64 -32
1253+
%gep4 = getelementptr i64, i64* %phi1, i64 -32
1254+
%cond = icmp sgt i64 %dec.i, 0
1255+
br i1 %cond, label %for.body, label %end
1256+
end:
1257+
ret void
1258+
}
1259+
1260+
define void @post-indexed-doubleword-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
1261+
; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range
1262+
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}]
1263+
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
1264+
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}]
1265+
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256
1266+
1267+
br label %for.body
1268+
for.body:
1269+
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1270+
%phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1271+
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1272+
%gep1 = getelementptr i64, i64* %phi1, i64 1
1273+
%load1 = load i64, i64* %gep1
1274+
%gep2 = getelementptr i64, i64* %phi2, i64 1
1275+
store i64 %load1, i64* %gep2
1276+
%load2 = load i64, i64* %phi1
1277+
store i64 %load2, i64* %phi2
1278+
%dec.i = add nsw i64 %i, -1
1279+
%gep3 = getelementptr i64, i64* %phi2, i64 32
1280+
%gep4 = getelementptr i64, i64* %phi1, i64 32
1281+
%cond = icmp sgt i64 %dec.i, 0
1282+
br i1 %cond, label %for.body, label %end
1283+
end:
1284+
ret void
1285+
}
1286+
1287+
define void @post-indexed-paired-min-offset(i64* %a, i64* %b, i64 %count) nounwind {
1288+
; CHECK-LABEL: post-indexed-paired-min-offset
1289+
; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
1290+
; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}], #-512
1291+
br label %for.body
1292+
for.body:
1293+
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1294+
%phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1295+
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1296+
%gep1 = getelementptr i64, i64* %phi1, i64 1
1297+
%load1 = load i64, i64* %gep1
1298+
%gep2 = getelementptr i64, i64* %phi2, i64 1
1299+
%load2 = load i64, i64* %phi1
1300+
store i64 %load1, i64* %gep2
1301+
store i64 %load2, i64* %phi2
1302+
%dec.i = add nsw i64 %i, -1
1303+
%gep3 = getelementptr i64, i64* %phi2, i64 -64
1304+
%gep4 = getelementptr i64, i64* %phi1, i64 -64
1305+
%cond = icmp sgt i64 %dec.i, 0
1306+
br i1 %cond, label %for.body, label %end
1307+
end:
1308+
ret void
1309+
}
1310+
1311+
define void @post-indexed-paired-offset-out-of-range(i64* %a, i64* %b, i64 %count) nounwind {
1312+
; CHECK-LABEL: post-indexed-paired-offset-out-of-range
1313+
; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
1314+
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
1315+
; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}]
1316+
; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #512
1317+
br label %for.body
1318+
for.body:
1319+
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
1320+
%phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
1321+
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
1322+
%gep1 = getelementptr i64, i64* %phi1, i64 1
1323+
%load1 = load i64, i64* %phi1
1324+
%gep2 = getelementptr i64, i64* %phi2, i64 1
1325+
%load2 = load i64, i64* %gep1
1326+
store i64 %load1, i64* %gep2
1327+
store i64 %load2, i64* %phi2
1328+
%dec.i = add nsw i64 %i, -1
1329+
%gep3 = getelementptr i64, i64* %phi2, i64 64
1330+
%gep4 = getelementptr i64, i64* %phi1, i64 64
1331+
%cond = icmp sgt i64 %dec.i, 0
1332+
br i1 %cond, label %for.body, label %end
1333+
end:
1334+
ret void
1335+
}

0 commit comments

Comments
 (0)