Skip to content

Commit 4456334

Browse files
wdvxdr1123randall77
authored andcommitted
cmd/compile: move {SHL,SHR,SAR}X rules to late lower pass
This can reduce a bunch of rules. Change-Id: Id7d644307c295a0ed16eb837b3755d1117a4fbf7 Reviewed-on: https://go-review.googlesource.com/c/go/+/440036 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Wayne Zuo <[email protected]> Reviewed-by: David Chase <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 1c783f7 commit 4456334

File tree

4 files changed

+281
-1966
lines changed

4 files changed

+281
-1966
lines changed

src/cmd/compile/internal/ssa/_gen/AMD64.rules

+22-46
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,6 @@
208208
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
209209
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
210210

211-
// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
212-
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
213-
(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
214-
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
215-
216211
// Lowering integer comparisons
217212
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
218213
(Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y))
@@ -605,8 +600,6 @@
605600
// mutandis, for UGE and SETAE, and CC and SETCC.
606601
((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
607602
((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
608-
((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
609-
((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
610603
((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
611604
=> ((ULT|UGE) (BTLconst [int8(log32(c))] x))
612605
((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@@ -615,8 +608,6 @@
615608
=> ((ULT|UGE) (BTQconst [int8(log64(c))] x))
616609
(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
617610
(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
618-
(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
619-
(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
620611
(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
621612
=> (SET(B|AE) (BTLconst [int8(log32(c))] x))
622613
(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@@ -628,10 +619,6 @@
628619
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
629620
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
630621
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
631-
(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
632-
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
633-
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
634-
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
635622
(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
636623
=> (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
637624
(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
@@ -644,7 +631,6 @@
644631
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
645632
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
646633
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
647-
(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x)
648634
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
649635
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
650636
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
@@ -659,8 +645,6 @@
659645
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
660646
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
661647
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
662-
(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
663-
(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
664648

665649
// Convert ORconst into BTS, if the code gets smaller, with boundary being
666650
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@@ -676,8 +660,6 @@
676660
// Recognize bit clearing: a &^= 1<<b
677661
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
678662
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
679-
(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
680-
(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
681663
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
682664
=> (BTRQconst [int8(log32(^c))] x)
683665
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
@@ -819,45 +801,39 @@
819801

820802
(SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
821803
(SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
822-
(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
823-
(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
824804

825805
(SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
826806
(SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
827807
(SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x)
828808
(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
829809
(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
830810
(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
831-
(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
832-
(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
833811

834812
(SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
835813
(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
836814
(SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
837815
(SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
838-
(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
839-
(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
840816

841817
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
842-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
843-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
844-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
845-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
846-
847-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
848-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
849-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
850-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
851-
852-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
853-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
854-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
855-
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
856-
857-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
858-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
859-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
860-
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
818+
((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
819+
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
820+
((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
821+
((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
822+
823+
((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
824+
((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
825+
((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
826+
((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
827+
828+
((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
829+
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
830+
((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
831+
((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
832+
833+
((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
834+
((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
835+
((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
836+
((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
861837

862838
// rotate left negative = rotate right
863839
(ROLQ x (NEG(Q|L) y)) => (RORQ x y)
@@ -2231,9 +2207,9 @@
22312207
&& clobber(x0, x1, sh)
22322208
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
22332209

2234-
(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
2235-
(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
2236-
(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
2210+
(SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
2211+
(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
2212+
(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
22372213

22382214
((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))
22392215
((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))

src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules

+5
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,8 @@
99
(LEA(Q|L|W)2 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)2 <x.Type> x y))
1010
(LEA(Q|L|W)4 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)4 <x.Type> x y))
1111
(LEA(Q|L|W)8 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)8 <x.Type> x y))
12+
13+
// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
14+
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
15+
(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
16+
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)

0 commit comments

Comments
 (0)