@@ -97,12 +97,15 @@ import (
97
97
"crypto/elliptic/internal/fiat"
98
98
"crypto/subtle"
99
99
"errors"
100
+ "sync"
100
101
)
101
102
102
103
var {{.p}}B, _ = new({{.Element}}).SetBytes({{.B}})
103
104
104
105
var {{.p}}G, _ = New{{.P}}Point().SetBytes({{.G}})
105
106
107
+ // {{.p}}ElementLength is the length of an element of the base or scalar field,
108
+ // which have the same bytes length for all NIST P curves.
106
109
const {{.p}}ElementLength = {{ .ElementLen }}
107
110
108
111
// {{.P}}Point is a {{.P}} point. The zero value is NOT valid.
@@ -329,54 +332,122 @@ func (q *{{.P}}Point) Select(p1, p2 *{{.P}}Point, cond int) *{{.P}}Point {
329
332
return q
330
333
}
331
334
335
+ // A {{.p}}Table holds the first 15 multiples of a point at offset -1, so [1]P
336
+ // is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
337
+ // point.
338
+ type {{.p}}Table [15]*{{.P}}Point
339
+
340
+ // Select selects the n-th multiple of the table base point into p. It works in
341
+ // constant time by iterating over every entry of the table. n must be in [0, 15].
342
+ func (table *{{.p}}Table) Select(p *{{.P}}Point, n uint8) {
343
+ if n >= 16 {
344
+ panic("nistec: internal error: {{.p}}Table called with out-of-bounds value")
345
+ }
346
+ p.Set(New{{.P}}Point())
347
+ for i := uint8(1); i < 16; i++ {
348
+ cond := subtle.ConstantTimeByteEq(i, n)
349
+ p.Select(table[i-1], p, cond)
350
+ }
351
+ }
352
+
332
353
// ScalarMult sets p = scalar * q, and returns p.
333
354
func (p *{{.P}}Point) ScalarMult(q *{{.P}}Point, scalar []byte) (*{{.P}}Point, error) {
334
- // table holds the first 16 multiples of q. The explicit new{{.P}}Point calls
335
- // get inlined, letting the allocations live on the stack.
336
- var table = [16]*{{.P}}Point{
337
- New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
338
- New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
355
+ // Compute a {{.p}}Table for the base point q. The explicit New{{.P}}Point
356
+ // calls get inlined, letting the allocations live on the stack.
357
+ var table = {{.p}}Table{New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
339
358
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
340
359
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
341
- }
342
- for i := 1; i < 16; i++ {
343
- table[i].Add(table[i-1], q)
360
+ New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point()}
361
+ table[0].Set(q)
362
+ for i := 1; i < 15; i += 2 {
363
+ table[i].Double(table[i/2])
364
+ table[i+1].Add(table[i], q)
344
365
}
345
366
346
367
// Instead of doing the classic double-and-add chain, we do it with a
347
368
// four-bit window: we double four times, and then add [0-15]P.
348
369
t := New{{.P}}Point()
349
370
p.Set(New{{.P}}Point())
350
- for _, byte := range scalar {
351
- p.Double(p)
352
- p.Double(p)
353
- p.Double(p)
354
- p.Double(p)
355
-
356
- for i := uint8(0); i < 16; i++ {
357
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
358
- t.Select(table[i], t, cond)
371
+ for i, byte := range scalar {
372
+ // No need to double on the first iteration, as p is the identity at
373
+ // this point, and [N]∞ = ∞.
374
+ if i != 0 {
375
+ p.Double(p)
376
+ p.Double(p)
377
+ p.Double(p)
378
+ p.Double(p)
359
379
}
380
+
381
+ windowValue := byte >> 4
382
+ table.Select(t, windowValue)
360
383
p.Add(p, t)
361
384
362
385
p.Double(p)
363
386
p.Double(p)
364
387
p.Double(p)
365
388
p.Double(p)
366
389
367
- for i := uint8(0); i < 16; i++ {
368
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
369
- t.Select(table[i], t, cond)
370
- }
390
+ windowValue = byte & 0b1111
391
+ table.Select(t, windowValue)
371
392
p.Add(p, t)
372
393
}
373
394
374
395
return p, nil
375
396
}
376
397
398
+ var {{.p}}GeneratorTable *[{{.p}}ElementLength * 2]{{.p}}Table
399
+ var {{.p}}GeneratorTableOnce sync.Once
400
+
401
+ // generatorTable returns a sequence of {{.p}}Tables. The first table contains
402
+ // multiples of G. Each successive table is the previous table doubled four
403
+ // times.
404
+ func (p *{{.P}}Point) generatorTable() *[{{.p}}ElementLength * 2]{{.p}}Table {
405
+ {{.p}}GeneratorTableOnce.Do(func() {
406
+ {{.p}}GeneratorTable = new([{{.p}}ElementLength * 2]{{.p}}Table)
407
+ base := New{{.P}}Generator()
408
+ for i := 0; i < {{.p}}ElementLength*2; i++ {
409
+ {{.p}}GeneratorTable[i][0] = New{{.P}}Point().Set(base)
410
+ for j := 1; j < 15; j++ {
411
+ {{.p}}GeneratorTable[i][j] = New{{.P}}Point().Add({{.p}}GeneratorTable[i][j-1], base)
412
+ }
413
+ base.Double(base)
414
+ base.Double(base)
415
+ base.Double(base)
416
+ base.Double(base)
417
+ }
418
+ })
419
+ return {{.p}}GeneratorTable
420
+ }
421
+
377
422
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
378
423
// returns p.
379
424
func (p *{{.P}}Point) ScalarBaseMult(scalar []byte) (*{{.P}}Point, error) {
380
- return p.ScalarMult(New{{.P}}Generator(), scalar)
425
+ if len(scalar) != {{.p}}ElementLength {
426
+ return nil, errors.New("invalid scalar length")
427
+ }
428
+ tables := p.generatorTable()
429
+
430
+ // This is also a scalar multiplication with a four-bit window like in
431
+ // ScalarMult, but in this case the doublings are precomputed. The value
432
+ // [windowValue]G added at iteration k would normally get doubled
433
+ // (totIterations-k)×4 times, but with a larger precomputation we can
434
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
435
+ // doublings between iterations.
436
+ t := New{{.P}}Point()
437
+ p.Set(New{{.P}}Point())
438
+ tableIndex := len(tables) - 1
439
+ for _, byte := range scalar {
440
+ windowValue := byte >> 4
441
+ tables[tableIndex].Select(t, windowValue)
442
+ p.Add(p, t)
443
+ tableIndex--
444
+
445
+ windowValue = byte & 0b1111
446
+ tables[tableIndex].Select(t, windowValue)
447
+ p.Add(p, t)
448
+ tableIndex--
449
+ }
450
+
451
+ return p, nil
381
452
}
382
453
`
0 commit comments