Skip to content

Commit a621973

Browse files
wdvxdr11234a6f656c
authored andcommitted
cmd/compile: intrinsify Sub64 on riscv64
After this CL, the performance difference in crypto/elliptic benchmarks on linux/riscv64 are: name old time/op new time/op delta ScalarBaseMult/P256 1.64ms ± 1% 1.60ms ± 1% -2.36% (p=0.008 n=5+5) ScalarBaseMult/P224 1.53ms ± 1% 1.47ms ± 2% -4.24% (p=0.008 n=5+5) ScalarBaseMult/P384 5.12ms ± 2% 5.03ms ± 2% ~ (p=0.095 n=5+5) ScalarBaseMult/P521 22.3ms ± 2% 13.8ms ± 1% -37.89% (p=0.008 n=5+5) ScalarMult/P256 4.49ms ± 2% 4.26ms ± 2% -5.13% (p=0.008 n=5+5) ScalarMult/P224 4.33ms ± 1% 4.09ms ± 1% -5.59% (p=0.008 n=5+5) ScalarMult/P384 16.3ms ± 1% 15.5ms ± 2% -4.78% (p=0.008 n=5+5) ScalarMult/P521 101ms ± 0% 47ms ± 2% -53.36% (p=0.008 n=5+5) Change-Id: I31cf0506e27f9d85f576af1813630a19c20dda8a Reviewed-on: https://go-review.googlesource.com/c/go/+/420095 Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Joel Sing <[email protected]> Reviewed-by: David Chase <[email protected]> Run-TryBot: Wayne Zuo <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 969f48a commit a621973

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

Diff for: src/cmd/compile/internal/ssa/gen/RISCV64.rules

+4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@
5656
(Select1 (Add64carry x y c)) =>
5757
(OR (SLTU <typ.UInt64> s:(ADD <typ.UInt64> x y) x) (SLTU <typ.UInt64> (ADD <typ.UInt64> s c) s))
5858

59+
(Select0 (Sub64borrow x y c)) => (SUB (SUB <typ.UInt64> x y) c)
60+
(Select1 (Sub64borrow x y c)) =>
61+
(OR (SLTU <typ.UInt64> x s:(SUB <typ.UInt64> x y)) (SLTU <typ.UInt64> s (SUB <typ.UInt64> s c)))
62+
5963
// (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1)
6064
(Avg64u <t> x y) => (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
6165

Diff for: src/cmd/compile/internal/ssa/rewriteRISCV64.go

+36
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: src/cmd/compile/internal/ssagen/ssa.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -4732,8 +4732,8 @@ func InitTables() {
47324732
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
47334733
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
47344734
},
4735-
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X)
4736-
alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X)
4735+
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
4736+
alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X, sys.ArchRISCV64)
47374737
addF("math/bits", "Div64",
47384738
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
47394739
// check for divide-by-zero/overflow and panic with appropriate message

Diff for: test/codegen/mathbits.go

+8
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ func Sub(x, y, ci uint) (r, co uint) {
621621
// ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
622622
// ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
623623
// s390x:"SUBE"
624+
// riscv64: "SUB","SLTU"
624625
return bits.Sub(x, y, ci)
625626
}
626627

@@ -630,6 +631,7 @@ func SubC(x, ci uint) (r, co uint) {
630631
// ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
631632
// ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
632633
// s390x:"SUBE"
634+
// riscv64: "SUB","SLTU"
633635
return bits.Sub(x, 7, ci)
634636
}
635637

@@ -639,6 +641,7 @@ func SubZ(x, y uint) (r, co uint) {
639641
// ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
640642
// ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
641643
// s390x:"SUBC"
644+
// riscv64: "SUB","SLTU"
642645
return bits.Sub(x, y, 0)
643646
}
644647

@@ -648,6 +651,7 @@ func SubR(x, y, ci uint) uint {
648651
// ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
649652
// ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
650653
// s390x:"SUBE"
654+
// riscv64: "SUB",-"SLTU"
651655
r, _ := bits.Sub(x, y, ci)
652656
return r
653657
}
@@ -669,6 +673,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
669673
// ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
670674
// ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
671675
// s390x:"SUBE"
676+
// riscv64: "SUB","SLTU"
672677
return bits.Sub64(x, y, ci)
673678
}
674679

@@ -678,6 +683,7 @@ func Sub64C(x, ci uint64) (r, co uint64) {
678683
// ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
679684
// ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
680685
// s390x:"SUBE"
686+
// riscv64: "SUB","SLTU"
681687
return bits.Sub64(x, 7, ci)
682688
}
683689

@@ -687,6 +693,7 @@ func Sub64Z(x, y uint64) (r, co uint64) {
687693
// ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
688694
// ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
689695
// s390x:"SUBC"
696+
// riscv64: "SUB","SLTU"
690697
return bits.Sub64(x, y, 0)
691698
}
692699

@@ -696,6 +703,7 @@ func Sub64R(x, y, ci uint64) uint64 {
696703
// ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
697704
// ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
698705
// s390x:"SUBE"
706+
// riscv64: "SUB",-"SLTU"
699707
r, _ := bits.Sub64(x, y, ci)
700708
return r
701709
}

0 commit comments

Comments
 (0)