Skip to content

Commit 1c783f7

Browse files
wdvxdr1123randall77
authored andcommitted
cmd/compile: split 3 operand LEA in late lower pass
On newer amd64 cpus 3 operand LEA instructions are slow, CL 114655 split them to 2 LEA instructions in genssa. This CL make late lower pass run after addressing modes, and split 3 operand LEA in late lower pass so that we can do common-subexpression elimination for splited LEAs. Updates #21735 Change-Id: Ied49139c7abab655e1a14a6fd793bdf9f987d1f1 Reviewed-on: https://go-review.googlesource.com/c/go/+/440035 TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Wayne Zuo <[email protected]> Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Joedian Reid <[email protected]>
1 parent 7ffc1e4 commit 1c783f7

File tree

4 files changed

+419
-1
lines changed

4 files changed

+419
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Copyright 2022 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// split 3 operand LEA.
6+
// Note: Don't split pointer computations in order to avoid invalid pointers.
7+
(LEA(Q|L|W)1 <t> [c] {s} x y) && isPtr(x.Type) && c != 0 && s == nil => (ADD(Q|L|L) x (ADD(Q|L|L)const <y.Type> [c] y))
8+
(LEA(Q|L|W)1 <t> [c] {s} x y) && !isPtr(x.Type) && c != 0 && s == nil => (ADD(Q|L|L) y (ADD(Q|L|L)const <x.Type> [c] x))
9+
(LEA(Q|L|W)2 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)2 <x.Type> x y))
10+
(LEA(Q|L|W)4 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)4 <x.Type> x y))
11+
(LEA(Q|L|W)8 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)8 <x.Type> x y))

src/cmd/compile/internal/ssa/compile.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -486,8 +486,8 @@ var passes = [...]pass{
486486
{name: "insert resched checks", fn: insertLoopReschedChecks,
487487
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
488488
{name: "lower", fn: lower, required: true},
489-
{name: "late lower", fn: lateLower, required: true},
490489
{name: "addressing modes", fn: addressingModes, required: false},
490+
{name: "late lower", fn: lateLower, required: true},
491491
{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
492492
{name: "lowered cse", fn: cse},
493493
{name: "elim unread autos", fn: elimUnreadAutos},

src/cmd/compile/internal/ssa/config.go

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
185185
c.RegSize = 8
186186
c.lowerBlock = rewriteBlockAMD64
187187
c.lowerValue = rewriteValueAMD64
188+
c.lateLowerValue = rewriteValueAMD64latelower
188189
c.splitLoad = rewriteValueAMD64splitload
189190
c.registers = registersAMD64[:]
190191
c.gpRegMask = gpRegMaskAMD64

0 commit comments

Comments
 (0)