31
31
import sun .security .util .math .IntegerFieldModuloP ;
32
32
import java .math .BigInteger ;
33
33
import jdk .internal .vm .annotation .IntrinsicCandidate ;
34
+ import jdk .internal .vm .annotation .ForceInline ;
34
35
35
36
// Reference:
36
37
// - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve
@@ -103,8 +104,8 @@ public ImmutableElement getElement(BigInteger v) {
103
104
setLimbsValuePositive (v , vLimbs );
104
105
105
106
// Convert to Montgomery domain
106
- int numAdds = mult (vLimbs , h , montLimbs );
107
- return new ImmutableElement (montLimbs , numAdds );
107
+ mult (vLimbs , h , montLimbs );
108
+ return new ImmutableElement (montLimbs , 0 );
108
109
}
109
110
110
111
@ Override
@@ -114,24 +115,6 @@ public SmallValue getSmallValue(int value) {
114
115
return super .getSmallValue (value );
115
116
}
116
117
117
- /*
118
- * This function is used by IntegerPolynomial.setProduct(SmallValue v) to
119
- * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a
120
- * montgomery conversion followed by a montgomery multiplication, just use
121
- * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1]
122
- * Section 4 )
123
- *
124
- * Will return an unreduced value
125
- */
126
- @ Override
127
- protected int multByInt (long [] a , long b ) {
128
- assert (b < (1 << BITS_PER_LIMB ));
129
- for (int i = 0 ; i < a .length ; i ++) {
130
- a [i ] *= b ;
131
- }
132
- return (int ) (b - 1 );
133
- }
134
-
135
118
@ Override
136
119
public ImmutableIntegerModuloP fromMontgomery (ImmutableIntegerModuloP n ) {
137
120
assert n .getField () == MontgomeryIntegerPolynomialP256 .ONE ;
@@ -163,19 +146,27 @@ private void halfLimbs(long[] a, long[] r) {
163
146
}
164
147
165
148
@ Override
166
- protected int square (long [] a , long [] r ) {
167
- return mult (a , a , r );
149
+ protected void square (long [] a , long [] r ) {
150
+ mult (a , a , r );
168
151
}
169
152
153
+
170
154
/**
171
155
* Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P)
172
156
*
173
157
* See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication
174
158
* for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use
175
159
* numAdds to reuse existing overflow logic.
176
160
*/
161
+ @ Override
162
+ protected void mult (long [] a , long [] b , long [] r ) {
163
+ multImpl (a , b , r );
164
+ reducePositive (r );
165
+ }
166
+
167
+ @ ForceInline
177
168
@ IntrinsicCandidate
178
- protected int mult (long [] a , long [] b , long [] r ) {
169
+ private void multImpl (long [] a , long [] b , long [] r ) {
179
170
long aa0 = a [0 ];
180
171
long aa1 = a [1 ];
181
172
long aa2 = a [2 ];
@@ -408,36 +399,16 @@ protected int mult(long[] a, long[] b, long[] r) {
408
399
d4 += n4 & LIMB_MASK ;
409
400
410
401
c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB );
411
- c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB );
412
- c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB );
413
- c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB );
414
- c9 = dd4 + (c8 >>> BITS_PER_LIMB );
415
-
416
- c5 &= LIMB_MASK ;
417
- c6 &= LIMB_MASK ;
418
- c7 &= LIMB_MASK ;
419
- c8 &= LIMB_MASK ;
420
-
421
- // At this point, the result could overflow by one modulus.
422
- c0 = c5 - modulus [0 ];
423
- c1 = c6 - modulus [1 ] + (c0 >> BITS_PER_LIMB );
424
- c0 &= LIMB_MASK ;
425
- c2 = c7 - modulus [2 ] + (c1 >> BITS_PER_LIMB );
426
- c1 &= LIMB_MASK ;
427
- c3 = c8 - modulus [3 ] + (c2 >> BITS_PER_LIMB );
428
- c2 &= LIMB_MASK ;
429
- c4 = c9 - modulus [4 ] + (c3 >> BITS_PER_LIMB );
430
- c3 &= LIMB_MASK ;
431
-
432
- long mask = c4 >> BITS_PER_LIMB ; // Signed shift!
433
-
434
- r [0 ] = ((c5 & mask ) | (c0 & ~mask ));
435
- r [1 ] = ((c6 & mask ) | (c1 & ~mask ));
436
- r [2 ] = ((c7 & mask ) | (c2 & ~mask ));
437
- r [3 ] = ((c8 & mask ) | (c3 & ~mask ));
438
- r [4 ] = ((c9 & mask ) | (c4 & ~mask ));
439
-
440
- return 0 ;
402
+ c6 += d2 + dd1 ;
403
+ c7 += d3 + dd2 ;
404
+ c8 += d4 + dd3 ;
405
+ c9 = dd4 ;
406
+
407
+ r [0 ] = c5 ;
408
+ r [1 ] = c6 ;
409
+ r [2 ] = c7 ;
410
+ r [3 ] = c8 ;
411
+ r [4 ] = c9 ;
441
412
}
442
413
443
414
@ Override
@@ -516,8 +487,8 @@ public ImmutableElement getElement(byte[] v, int offset, int length,
516
487
super .encode (v , offset , length , highByte , vLimbs );
517
488
518
489
// Convert to Montgomery domain
519
- int numAdds = mult (vLimbs , h , montLimbs );
520
- return new ImmutableElement (montLimbs , numAdds );
490
+ mult (vLimbs , h , montLimbs );
491
+ return new ImmutableElement (montLimbs , 0 );
521
492
}
522
493
523
494
/*
@@ -556,4 +527,27 @@ protected void reduceIn(long[] limbs, long v, int i) {
556
527
limbs [i - 5 ] += (v << 4 ) & LIMB_MASK ;
557
528
limbs [i - 4 ] += v >> 48 ;
558
529
}
530
+
531
+ // Used when limbs a could overflow by one modulus.
532
+ @ ForceInline
533
+ protected void reducePositive (long [] a ) {
534
+ long aa0 = a [0 ];
535
+ long aa1 = a [1 ] + (aa0 >>BITS_PER_LIMB );
536
+ long aa2 = a [2 ] + (aa1 >>BITS_PER_LIMB );
537
+ long aa3 = a [3 ] + (aa2 >>BITS_PER_LIMB );
538
+ long aa4 = a [4 ] + (aa3 >>BITS_PER_LIMB );
539
+
540
+ long c0 = a [0 ] - modulus [0 ];
541
+ long c1 = a [1 ] - modulus [1 ] + (c0 >> BITS_PER_LIMB );
542
+ long c2 = a [2 ] - modulus [2 ] + (c1 >> BITS_PER_LIMB );
543
+ long c3 = a [3 ] - modulus [3 ] + (c2 >> BITS_PER_LIMB );
544
+ long c4 = a [4 ] - modulus [4 ] + (c3 >> BITS_PER_LIMB );
545
+ long mask = c4 >> BITS_PER_LIMB ; // Signed shift!
546
+
547
+ a [0 ] = ((aa0 & mask ) | (c0 & ~mask )) & LIMB_MASK ;
548
+ a [1 ] = ((aa1 & mask ) | (c1 & ~mask )) & LIMB_MASK ;
549
+ a [2 ] = ((aa2 & mask ) | (c2 & ~mask )) & LIMB_MASK ;
550
+ a [3 ] = ((aa3 & mask ) | (c3 & ~mask )) & LIMB_MASK ;
551
+ a [4 ] = ((aa4 & mask ) | (c4 & ~mask ));
552
+ }
559
553
}
0 commit comments