Skip to content

Commit 40e2589

Browse files
committed
cmd/compile,math: improve int<->float conversions on ppc64x
The functions Float64bits and Float64frombits perform poorly on ppc64x because the int<->float conversions often result in load and store sequences to handle the type change. This patch adds more rules to recognize those sequences and use register to register moves and avoid unnecessary loads and stores where possible. There were some existing rules to improve these conversions, but this provides additional improvements. Included here: - New instruction FCFIDS to improve on conversion to 32 bit - Rename Xf2i64 and Xi2f64 as MTVSRD, MFVSRD, to match the asm - Add rules to lower some of the load/store sequences for - Added new go asm to ppc64.s testcase. conversions Improvements: BenchmarkAbs-16 2.16 0.93 -56.94% BenchmarkCopysign-16 2.66 1.18 -55.64% BenchmarkRound-16 4.82 2.69 -44.19% BenchmarkSignbit-16 1.71 1.14 -33.33% BenchmarkFrexp-16 11.4 7.94 -30.35% BenchmarkLogb-16 10.4 7.34 -29.42% BenchmarkLdexp-16 15.7 11.2 -28.66% BenchmarkIlogb-16 10.2 7.32 -28.24% BenchmarkPowInt-16 69.6 55.9 -19.68% BenchmarkModf-16 10.1 8.19 -18.91% BenchmarkLog2-16 17.4 14.3 -17.82% BenchmarkCbrt-16 45.0 37.3 -17.11% BenchmarkAtanh-16 57.6 48.3 -16.15% BenchmarkRemainder-16 76.6 65.4 -14.62% BenchmarkGamma-16 26.0 22.5 -13.46% BenchmarkPowFrac-16 197 174 -11.68% BenchmarkMod-16 112 99.8 -10.89% BenchmarkAsinh-16 59.9 53.7 -10.35% BenchmarkAcosh-16 44.8 40.3 -10.04% Updates golang#21390 Change-Id: I56cc991fc2e55249d69518d4e1ba76cc23904e35 Reviewed-on: https://go-review.googlesource.com/63290 Reviewed-by: Michael Munday <[email protected]>
1 parent f351dbf commit 40e2589

File tree

9 files changed

+175
-63
lines changed

9 files changed

+175
-63
lines changed

src/cmd/asm/internal/asm/testdata/ppc64.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,14 @@ label1:
550550
// ftsqrt BF, FRB
551551
FTSQRT F2,$7
552552

553+
// FCFID
554+
// FCFIDS
555+
556+
FCFID F2,F3
557+
FCFIDCC F3,F3
558+
FCFIDS F2,F3
559+
FCFIDSCC F2,F3
560+
553561
//
554562
// CMP
555563
//

src/cmd/compile/internal/ppc64/ssa.go

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -152,29 +152,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
152152
p.To.Reg = y
153153
}
154154

155-
case ssa.OpPPC64Xf2i64:
156-
{
157-
x := v.Args[0].Reg()
158-
y := v.Reg()
159-
160-
p := s.Prog(ppc64.AMFVSRD)
161-
p.From.Type = obj.TYPE_REG
162-
p.From.Reg = x
163-
p.To.Type = obj.TYPE_REG
164-
p.To.Reg = y
165-
}
166-
case ssa.OpPPC64Xi2f64:
167-
{
168-
x := v.Args[0].Reg()
169-
y := v.Reg()
170-
171-
p := s.Prog(ppc64.AMTVSRD)
172-
p.From.Type = obj.TYPE_REG
173-
p.From.Reg = x
174-
p.To.Type = obj.TYPE_REG
175-
p.To.Reg = y
176-
}
177-
178155
case ssa.OpPPC64LoweredAtomicAnd8,
179156
ssa.OpPPC64LoweredAtomicOr8:
180157
// SYNC
@@ -597,7 +574,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
597574
p.To.Type = obj.TYPE_REG
598575
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
599576

600-
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
577+
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD:
601578
r := v.Reg()
602579
p := s.Prog(v.Op.Asm())
603580
p.To.Type = obj.TYPE_REG

src/cmd/compile/internal/ssa/gen/PPC64.rules

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,25 @@
5757
(Div64F x y) -> (FDIV x y)
5858

5959
// Lowering float <-> int
60-
(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
61-
(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x)))
62-
(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x)))
63-
(Cvt64to64F x) -> (FCFID (Xi2f64 x))
60+
(Cvt32to32F x) -> (FCFIDS (MTVSRD (SignExt32to64 x)))
61+
(Cvt32to64F x) -> (FCFID (MTVSRD (SignExt32to64 x)))
62+
(Cvt64to32F x) -> (FCFIDS (MTVSRD x))
63+
(Cvt64to64F x) -> (FCFID (MTVSRD x))
6464

65-
(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x))
66-
(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x))
67-
(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x))
68-
(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x))
65+
(Cvt32Fto32 x) -> (MFVSRD (FCTIWZ x))
66+
(Cvt32Fto64 x) -> (MFVSRD (FCTIDZ x))
67+
(Cvt64Fto32 x) -> (MFVSRD (FCTIWZ x))
68+
(Cvt64Fto64 x) -> (MFVSRD (FCTIDZ x))
6969

7070
(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
7171
(Cvt64Fto32F x) -> (FRSP x)
7272

73+
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
74+
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
75+
76+
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
77+
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
78+
7379
(Round32F x) -> (LoweredRound32F x)
7480
(Round64F x) -> (LoweredRound64F x)
7581

src/cmd/compile/internal/ssa/gen/PPC64Ops.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ func init() {
223223
{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
224224
{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
225225
{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float
226+
{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
226227
{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value
227228

228229
// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
@@ -231,8 +232,8 @@ func init() {
231232
// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
232233
// the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
233234

234-
{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"}, // move 64 bits of F register into G register
235-
{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
235+
{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
236+
{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
236237

237238
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
238239
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 20 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)