Skip to content

Commit 7615b20

Browse files
pmurlaboger
authored andcommitted
cmd/compile: generate subfic on ppc64
This merges an lis + subf into subfic, and for 32b constants lwa + subf into oris + ori + subf. The carry bit is no longer used in code generation, therefore I think we can clobber it as needed. Note, lowered borrow/carry arithmetic is self-contained and thus is not affected. A few extra rules are added to ensure early transformations to SUBFCconst don't trip up earlier rules, fold constant operations, or otherwise simplify lowering. Likewise, tests are added to ensure all rules are hit. Generic constant folding catches trivial cases, however some lowering rules insert arithmetic which can introduce new opportunities (e.g BitLen or Slicemask). I couldn't find a specific benchmark to demonstrate noteworthy improvements, but this is generating subfic in many of the default bent test binaries, so we are at least saving a little code space. Change-Id: Iad7c6e5767eaa9dc24dc1c989bd1c8cfe1982012 Reviewed-on: https://go-review.googlesource.com/c/go/+/249461 Run-TryBot: Lynn Boger <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Carlos Eduardo Seo <[email protected]>
1 parent 2013f70 commit 7615b20

File tree

8 files changed

+686
-17
lines changed

8 files changed

+686
-17
lines changed

src/cmd/compile/internal/ppc64/ssa.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
649649
p.To.Type = obj.TYPE_REG
650650
p.To.Reg = v.Reg()
651651

652+
case ssa.OpPPC64SUBFCconst:
653+
p := s.Prog(v.Op.Asm())
654+
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
655+
p.From.Type = obj.TYPE_REG
656+
p.From.Reg = v.Args[0].Reg()
657+
p.To.Type = obj.TYPE_REG
658+
p.To.Reg = v.Reg()
659+
652660
case ssa.OpPPC64ANDCCconst:
653661
p := s.Prog(v.Op.Asm())
654662
p.Reg = v.Args[0].Reg()

src/cmd/compile/internal/ssa/gen/PPC64.rules

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,21 @@
110110
// Rotate generation with non-const shift
111111
// these match patterns from math/bits/RotateLeft[32|64], but there could be others
112112
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
113+
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
113114
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
115+
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
114116
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
117+
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
115118

119+
120+
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
116121
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
122+
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
117123
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
124+
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
118125
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
119126

127+
120128
// Lowering rotates
121129
(RotateLeft32 x y) => (ROTLW x y)
122130
(RotateLeft64 x y) => (ROTL x y)
@@ -192,11 +200,15 @@
192200
(Rsh64Ux64 x (AND y (MOVDconst [63]))) => (SRD x (ANDconst <typ.Int64> [63] y))
193201
(Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) => (SRD x (ANDconst <typ.UInt> [63] y))
194202
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
203+
(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
195204
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
205+
(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
196206
(Rsh64x64 x (AND y (MOVDconst [63]))) => (SRAD x (ANDconst <typ.Int64> [63] y))
197207
(Rsh64x64 x (ANDconst <typ.UInt> [63] y)) => (SRAD x (ANDconst <typ.UInt> [63] y))
198208
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
209+
(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
199210
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
211+
(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
200212

201213
(Lsh64x64 x y) => (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
202214
(Rsh64x64 x y) => (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
@@ -208,12 +220,16 @@
208220
(Rsh32Ux64 x (AND y (MOVDconst [31]))) => (SRW x (ANDconst <typ.Int32> [31] y))
209221
(Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) => (SRW x (ANDconst <typ.UInt> [31] y))
210222
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
223+
(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
211224
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
225+
(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
212226

213227
(Rsh32x64 x (AND y (MOVDconst [31]))) => (SRAW x (ANDconst <typ.Int32> [31] y))
214228
(Rsh32x64 x (ANDconst <typ.UInt> [31] y)) => (SRAW x (ANDconst <typ.UInt> [31] y))
215229
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
230+
(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
216231
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
232+
(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
217233

218234
(Rsh32x64 x y) => (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
219235
(Rsh32Ux64 x y) => (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
@@ -299,8 +315,8 @@
299315
(Ctz16 x) => (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
300316
(Ctz8 x) => (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
301317

302-
(BitLen64 x) => (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
303-
(BitLen32 x) => (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
318+
(BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x))
319+
(BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x))
304320

305321
(PopCount64 ...) => (POPCNTD ...)
306322
(PopCount32 x) => (POPCNTW (MOVWZreg x))
@@ -770,10 +786,19 @@
770786
(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x)
771787
(ADDconst [0] x) => x
772788
(SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x)
773-
// TODO deal with subtract-from-const
774789

775790
(ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
776791

792+
// Subtract from (with carry, but ignored) constant.
793+
// Note, these clobber the carry bit.
794+
(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
795+
(SUBFCconst [c] (NEG x)) => (ADDconst [c] x)
796+
(SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x)
797+
(SUBFCconst [0] x) => (NEG x)
798+
(ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x)
799+
(NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x)
800+
(NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x)
801+
777802
// Use register moves instead of stores and loads to move int<=>float values
778803
// Common with math Float64bits, Float64frombits
779804
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x)

src/cmd/compile/internal/ssa/gen/PPC64Ops.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ func init() {
175175
{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1
176176
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
177177
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1
178+
{name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (with carry)
178179
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
179180
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
180181

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)