Skip to content

Commit 50b1add

Browse files
committed
crypto/elliptic: precompute ScalarBaseMult doublings
name old time/op new time/op delta pkg:crypto/ecdsa goos:darwin goarch:amd64 Sign/P224-16 250µs ± 2% 91µs ± 2% -63.42% (p=0.000 n=10+9) Sign/P384-16 955µs ± 3% 311µs ± 2% -67.48% (p=0.000 n=10+10) Sign/P521-16 2.74ms ± 2% 0.82ms ± 2% -69.95% (p=0.000 n=10+10) Verify/P224-16 440µs ± 3% 282µs ± 5% -35.94% (p=0.000 n=9+10) Verify/P384-16 1.72ms ± 2% 1.07ms ± 1% -38.02% (p=0.000 n=10+9) Verify/P521-16 5.10ms ± 2% 3.18ms ± 3% -37.70% (p=0.000 n=10+10) GenerateKey/P224-16 225µs ± 3% 67µs ± 4% -70.42% (p=0.000 n=9+10) GenerateKey/P384-16 881µs ± 1% 241µs ± 2% -72.67% (p=0.000 n=10+10) GenerateKey/P521-16 2.62ms ± 3% 0.69ms ± 3% -73.78% (p=0.000 n=10+9) pkg:crypto/elliptic/internal/nistec goos:darwin goarch:amd64 ScalarMult/P224-16 219µs ± 4% 209µs ± 3% -4.57% (p=0.003 n=10+10) ScalarMult/P384-16 838µs ± 2% 823µs ± 1% -1.72% (p=0.004 n=10+9) ScalarMult/P521-16 2.48ms ± 2% 2.45ms ± 2% ~ (p=0.052 n=10+10) ScalarBaseMult/P224-16 214µs ± 4% 54µs ± 4% -74.88% (p=0.000 n=10+10) ScalarBaseMult/P384-16 828µs ± 2% 196µs ± 3% -76.38% (p=0.000 n=10+10) ScalarBaseMult/P521-16 2.50ms ± 3% 0.55ms ± 2% -77.96% (p=0.000 n=10+10) Updates #52424 For #52182 Change-Id: I2be3c2b8cdeead512063ef489e43805f4ee71d0f Reviewed-on: https://go-review.googlesource.com/c/go/+/404174 TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Filippo Valsorda <[email protected]> Reviewed-by: Fernando Lobato Meeser <[email protected]> Reviewed-by: Roland Shoemaker <[email protected]>
1 parent 0b4f241 commit 50b1add

File tree

5 files changed

+465
-110
lines changed

5 files changed

+465
-110
lines changed

src/crypto/elliptic/internal/nistec/generate.go

+93-22
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,15 @@ import (
9797
"crypto/elliptic/internal/fiat"
9898
"crypto/subtle"
9999
"errors"
100+
"sync"
100101
)
101102
102103
var {{.p}}B, _ = new({{.Element}}).SetBytes({{.B}})
103104
104105
var {{.p}}G, _ = New{{.P}}Point().SetBytes({{.G}})
105106
107+
// {{.p}}ElementLength is the length of an element of the base or scalar field,
108+
// which have the same bytes length for all NIST P curves.
106109
const {{.p}}ElementLength = {{ .ElementLen }}
107110
108111
// {{.P}}Point is a {{.P}} point. The zero value is NOT valid.
@@ -329,54 +332,122 @@ func (q *{{.P}}Point) Select(p1, p2 *{{.P}}Point, cond int) *{{.P}}Point {
329332
return q
330333
}
331334
335+
// A {{.p}}Table holds the first 15 multiples of a point at offset -1, so [1]P
336+
// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
337+
// point.
338+
type {{.p}}Table [15]*{{.P}}Point
339+
340+
// Select selects the n-th multiple of the table base point into p. It works in
341+
// constant time by iterating over every entry of the table. n must be in [0, 15].
342+
func (table *{{.p}}Table) Select(p *{{.P}}Point, n uint8) {
343+
if n >= 16 {
344+
panic("nistec: internal error: {{.p}}Table called with out-of-bounds value")
345+
}
346+
p.Set(New{{.P}}Point())
347+
for i := uint8(1); i < 16; i++ {
348+
cond := subtle.ConstantTimeByteEq(i, n)
349+
p.Select(table[i-1], p, cond)
350+
}
351+
}
352+
332353
// ScalarMult sets p = scalar * q, and returns p.
333354
func (p *{{.P}}Point) ScalarMult(q *{{.P}}Point, scalar []byte) (*{{.P}}Point, error) {
334-
// table holds the first 16 multiples of q. The explicit new{{.P}}Point calls
335-
// get inlined, letting the allocations live on the stack.
336-
var table = [16]*{{.P}}Point{
337-
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
338-
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
355+
// Compute a {{.p}}Table for the base point q. The explicit New{{.P}}Point
356+
// calls get inlined, letting the allocations live on the stack.
357+
var table = {{.p}}Table{New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
339358
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
340359
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
341-
}
342-
for i := 1; i < 16; i++ {
343-
table[i].Add(table[i-1], q)
360+
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point()}
361+
table[0].Set(q)
362+
for i := 1; i < 15; i += 2 {
363+
table[i].Double(table[i/2])
364+
table[i+1].Add(table[i], q)
344365
}
345366
346367
// Instead of doing the classic double-and-add chain, we do it with a
347368
// four-bit window: we double four times, and then add [0-15]P.
348369
t := New{{.P}}Point()
349370
p.Set(New{{.P}}Point())
350-
for _, byte := range scalar {
351-
p.Double(p)
352-
p.Double(p)
353-
p.Double(p)
354-
p.Double(p)
355-
356-
for i := uint8(0); i < 16; i++ {
357-
cond := subtle.ConstantTimeByteEq(byte>>4, i)
358-
t.Select(table[i], t, cond)
371+
for i, byte := range scalar {
372+
// No need to double on the first iteration, as p is the identity at
373+
// this point, and [N]∞ = ∞.
374+
if i != 0 {
375+
p.Double(p)
376+
p.Double(p)
377+
p.Double(p)
378+
p.Double(p)
359379
}
380+
381+
windowValue := byte >> 4
382+
table.Select(t, windowValue)
360383
p.Add(p, t)
361384
362385
p.Double(p)
363386
p.Double(p)
364387
p.Double(p)
365388
p.Double(p)
366389
367-
for i := uint8(0); i < 16; i++ {
368-
cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
369-
t.Select(table[i], t, cond)
370-
}
390+
windowValue = byte & 0b1111
391+
table.Select(t, windowValue)
371392
p.Add(p, t)
372393
}
373394
374395
return p, nil
375396
}
376397
398+
var {{.p}}GeneratorTable *[{{.p}}ElementLength * 2]{{.p}}Table
399+
var {{.p}}GeneratorTableOnce sync.Once
400+
401+
// generatorTable returns a sequence of {{.p}}Tables. The first table contains
402+
// multiples of G. Each successive table is the previous table doubled four
403+
// times.
404+
func (p *{{.P}}Point) generatorTable() *[{{.p}}ElementLength * 2]{{.p}}Table {
405+
{{.p}}GeneratorTableOnce.Do(func() {
406+
{{.p}}GeneratorTable = new([{{.p}}ElementLength * 2]{{.p}}Table)
407+
base := New{{.P}}Generator()
408+
for i := 0; i < {{.p}}ElementLength*2; i++ {
409+
{{.p}}GeneratorTable[i][0] = New{{.P}}Point().Set(base)
410+
for j := 1; j < 15; j++ {
411+
{{.p}}GeneratorTable[i][j] = New{{.P}}Point().Add({{.p}}GeneratorTable[i][j-1], base)
412+
}
413+
base.Double(base)
414+
base.Double(base)
415+
base.Double(base)
416+
base.Double(base)
417+
}
418+
})
419+
return {{.p}}GeneratorTable
420+
}
421+
377422
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
378423
// returns p.
379424
func (p *{{.P}}Point) ScalarBaseMult(scalar []byte) (*{{.P}}Point, error) {
380-
return p.ScalarMult(New{{.P}}Generator(), scalar)
425+
if len(scalar) != {{.p}}ElementLength {
426+
return nil, errors.New("invalid scalar length")
427+
}
428+
tables := p.generatorTable()
429+
430+
// This is also a scalar multiplication with a four-bit window like in
431+
// ScalarMult, but in this case the doublings are precomputed. The value
432+
// [windowValue]G added at iteration k would normally get doubled
433+
// (totIterations-k)×4 times, but with a larger precomputation we can
434+
// instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
435+
// doublings between iterations.
436+
t := New{{.P}}Point()
437+
p.Set(New{{.P}}Point())
438+
tableIndex := len(tables) - 1
439+
for _, byte := range scalar {
440+
windowValue := byte >> 4
441+
tables[tableIndex].Select(t, windowValue)
442+
p.Add(p, t)
443+
tableIndex--
444+
445+
windowValue = byte & 0b1111
446+
tables[tableIndex].Select(t, windowValue)
447+
p.Add(p, t)
448+
tableIndex--
449+
}
450+
451+
return p, nil
381452
}
382453
`

src/crypto/elliptic/internal/nistec/p224.go

+93-22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)