|
208 | 208 | (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
|
209 | 209 | (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
|
210 | 210 |
|
211 |
| -// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input. |
212 |
| -(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y) |
213 |
| -(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y) |
214 |
| -(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y) |
215 |
| - |
216 | 211 | // Lowering integer comparisons
|
217 | 212 | (Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
|
218 | 213 | (Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y))
|
|
605 | 600 | // mutandis, for UGE and SETAE, and CC and SETCC.
|
606 | 601 | ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
|
607 | 602 | ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
|
608 |
| -((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y)) |
609 |
| -((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y)) |
610 | 603 | ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
|
611 | 604 | => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
|
612 | 605 | ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
|
|
615 | 608 | => ((ULT|UGE) (BTQconst [int8(log64(c))] x))
|
616 | 609 | (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
|
617 | 610 | (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
|
618 |
| -(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y)) |
619 |
| -(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y)) |
620 | 611 | (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
|
621 | 612 | => (SET(B|AE) (BTLconst [int8(log32(c))] x))
|
622 | 613 | (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
|
|
628 | 619 | => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
|
629 | 620 | (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
|
630 | 621 | => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
|
631 |
| -(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem) |
632 |
| - => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) |
633 |
| -(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem) |
634 |
| - => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) |
635 | 622 | (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
|
636 | 623 | => (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
|
637 | 624 | (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
|
|
644 | 631 | (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
|
645 | 632 | (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
|
646 | 633 | (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
|
647 |
| -(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x) |
648 | 634 | (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
|
649 | 635 | (BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
|
650 | 636 | (BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
|
|
659 | 645 | // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
|
660 | 646 | (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
|
661 | 647 | (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
|
662 |
| -(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) |
663 |
| -(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) |
664 | 648 |
|
665 | 649 | // Convert ORconst into BTS, if the code gets smaller, with boundary being
|
666 | 650 | // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
|
|
676 | 660 | // Recognize bit clearing: a &^= 1<<b
|
677 | 661 | (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
|
678 | 662 | (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
|
679 |
| -(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y) |
680 |
| -(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) |
681 | 663 | (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
|
682 | 664 | => (BTRQconst [int8(log32(^c))] x)
|
683 | 665 | (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
|
|
819 | 801 |
|
820 | 802 | (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
|
821 | 803 | (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
|
822 |
| -(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x) |
823 |
| -(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x) |
824 | 804 |
|
825 | 805 | (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
|
826 | 806 | (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
|
827 | 807 | (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x)
|
828 | 808 | (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
|
829 | 809 | (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
|
830 | 810 | (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
|
831 |
| -(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x) |
832 |
| -(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x) |
833 | 811 |
|
834 | 812 | (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
|
835 | 813 | (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
|
836 | 814 | (SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
|
837 | 815 | (SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
|
838 |
| -(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x) |
839 |
| -(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) |
840 | 816 |
|
841 | 817 | // Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
|
842 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) |
843 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y)) |
844 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) |
845 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y)) |
846 |
| - |
847 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) |
848 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y)) |
849 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) |
850 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y)) |
851 |
| - |
852 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) |
853 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y)) |
854 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y) |
855 |
| -((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y)) |
856 |
| - |
857 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) |
858 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y)) |
859 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y) |
860 |
| -((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y)) |
| 818 | +((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) |
| 819 | +((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) |
| 820 | +((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) |
| 821 | +((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) |
| 822 | + |
| 823 | +((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) |
| 824 | +((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) |
| 825 | +((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) |
| 826 | +((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) |
| 827 | + |
| 828 | +((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) |
| 829 | +((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) |
| 830 | +((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) |
| 831 | +((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) |
| 832 | + |
| 833 | +((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) |
| 834 | +((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) |
| 835 | +((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) |
| 836 | +((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) |
861 | 837 |
|
862 | 838 | // rotate left negative = rotate right
|
863 | 839 | (ROLQ x (NEG(Q|L) y)) => (RORQ x y)
|
|
2231 | 2207 | && clobber(x0, x1, sh)
|
2232 | 2208 | => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
|
2233 | 2209 |
|
2234 |
| -(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) |
2235 |
| -(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) |
2236 |
| -(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) |
| 2210 | +(SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) |
| 2211 | +(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) |
| 2212 | +(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) |
2237 | 2213 |
|
2238 | 2214 | ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))
|
2239 | 2215 | ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))
|
|
0 commit comments