Skip to content

Commit bc9c31d

Browse files
authored
Rollup merge of rust-lang#122884 - mzabaluev:pow-remove-exit-branch, r=Amanieu
Optimize integer `pow` by removing the exit branch The branch at the end of the `pow` implementations is redundant with multiplication code already present in the loop. By rotating the exit check, this branch can be largely removed, improving code size and reducing instruction cache misses. Testing on my machine (`x86_64`, 11th Gen Intel Core i5-1135G7 @ 2.40GHz), the `num::int_pow` benchmarks improve by some 40% for the unchecked operations and show some slight improvement for the checked operations as well.
2 parents 80eb5a8 + ac88b33 commit bc9c31d

File tree

3 files changed

+151
-89
lines changed

3 files changed

+151
-89
lines changed

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@
170170
#![feature(internal_impls_macro)]
171171
#![feature(ip)]
172172
#![feature(is_ascii_octdigit)]
173+
#![feature(is_val_statically_known)]
173174
#![feature(isqrt)]
174175
#![feature(link_cfg)]
175176
#![feature(offset_of_enum)]

library/core/src/num/int_macros.rs

+75-43
Original file line numberDiff line numberDiff line change
@@ -1496,18 +1496,17 @@ macro_rules! int_impl {
14961496
let mut base = self;
14971497
let mut acc: Self = 1;
14981498

1499-
while exp > 1 {
1499+
loop {
15001500
if (exp & 1) == 1 {
15011501
acc = try_opt!(acc.checked_mul(base));
1502+
// since exp!=0, finally the exp must be 1.
1503+
if exp == 1 {
1504+
return Some(acc);
1505+
}
15021506
}
15031507
exp /= 2;
15041508
base = try_opt!(base.checked_mul(base));
15051509
}
1506-
// since exp!=0, finally the exp must be 1.
1507-
// Deal with the final bit of the exponent separately, since
1508-
// squaring the base afterwards is not necessary and may cause a
1509-
// needless overflow.
1510-
acc.checked_mul(base)
15111510
}
15121511

15131512
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -1547,18 +1546,17 @@ macro_rules! int_impl {
15471546
let mut base = self;
15481547
let mut acc: Self = 1;
15491548

1550-
while exp > 1 {
1549+
loop {
15511550
if (exp & 1) == 1 {
15521551
acc = acc.strict_mul(base);
1552+
// since exp!=0, finally the exp must be 1.
1553+
if exp == 1 {
1554+
return acc;
1555+
}
15531556
}
15541557
exp /= 2;
15551558
base = base.strict_mul(base);
15561559
}
1557-
// since exp!=0, finally the exp must be 1.
1558-
// Deal with the final bit of the exponent separately, since
1559-
// squaring the base afterwards is not necessary and may cause a
1560-
// needless overflow.
1561-
acc.strict_mul(base)
15621560
}
15631561

15641562
/// Returns the square root of the number, rounded down.
@@ -2175,26 +2173,44 @@ macro_rules! int_impl {
21752173
#[must_use = "this returns the result of the operation, \
21762174
without modifying the original"]
21772175
#[inline]
2176+
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
21782177
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
21792178
if exp == 0 {
21802179
return 1;
21812180
}
21822181
let mut base = self;
21832182
let mut acc: Self = 1;
21842183

2185-
while exp > 1 {
2186-
if (exp & 1) == 1 {
2187-
acc = acc.wrapping_mul(base);
2184+
if intrinsics::is_val_statically_known(exp) {
2185+
while exp > 1 {
2186+
if (exp & 1) == 1 {
2187+
acc = acc.wrapping_mul(base);
2188+
}
2189+
exp /= 2;
2190+
base = base.wrapping_mul(base);
21882191
}
2189-
exp /= 2;
2190-
base = base.wrapping_mul(base);
2191-
}
21922192

2193-
// since exp!=0, finally the exp must be 1.
2194-
// Deal with the final bit of the exponent separately, since
2195-
// squaring the base afterwards is not necessary and may cause a
2196-
// needless overflow.
2197-
acc.wrapping_mul(base)
2193+
// since exp!=0, finally the exp must be 1.
2194+
// Deal with the final bit of the exponent separately, since
2195+
// squaring the base afterwards is not necessary.
2196+
acc.wrapping_mul(base)
2197+
} else {
2198+
// This is faster than the above when the exponent is not known
2199+
// at compile time. We can't use the same code for the constant
2200+
// exponent case because LLVM is currently unable to unroll
2201+
// this loop.
2202+
loop {
2203+
if (exp & 1) == 1 {
2204+
acc = acc.wrapping_mul(base);
2205+
// since exp!=0, finally the exp must be 1.
2206+
if exp == 1 {
2207+
return acc;
2208+
}
2209+
}
2210+
exp /= 2;
2211+
base = base.wrapping_mul(base);
2212+
}
2213+
}
21982214
}
21992215

22002216
/// Calculates `self` + `rhs`.
@@ -2690,9 +2706,14 @@ macro_rules! int_impl {
26902706
// Scratch space for storing results of overflowing_mul.
26912707
let mut r;
26922708

2693-
while exp > 1 {
2709+
loop {
26942710
if (exp & 1) == 1 {
26952711
r = acc.overflowing_mul(base);
2712+
// since exp!=0, finally the exp must be 1.
2713+
if exp == 1 {
2714+
r.1 |= overflown;
2715+
return r;
2716+
}
26962717
acc = r.0;
26972718
overflown |= r.1;
26982719
}
@@ -2701,14 +2722,6 @@ macro_rules! int_impl {
27012722
base = r.0;
27022723
overflown |= r.1;
27032724
}
2704-
2705-
// since exp!=0, finally the exp must be 1.
2706-
// Deal with the final bit of the exponent separately, since
2707-
// squaring the base afterwards is not necessary and may cause a
2708-
// needless overflow.
2709-
r = acc.overflowing_mul(base);
2710-
r.1 |= overflown;
2711-
r
27122725
}
27132726

27142727
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2728,26 +2741,45 @@ macro_rules! int_impl {
27282741
without modifying the original"]
27292742
#[inline]
27302743
#[rustc_inherit_overflow_checks]
2744+
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
27312745
pub const fn pow(self, mut exp: u32) -> Self {
27322746
if exp == 0 {
27332747
return 1;
27342748
}
27352749
let mut base = self;
27362750
let mut acc = 1;
27372751

2738-
while exp > 1 {
2739-
if (exp & 1) == 1 {
2740-
acc = acc * base;
2752+
if intrinsics::is_val_statically_known(exp) {
2753+
while exp > 1 {
2754+
if (exp & 1) == 1 {
2755+
acc = acc * base;
2756+
}
2757+
exp /= 2;
2758+
base = base * base;
27412759
}
2742-
exp /= 2;
2743-
base = base * base;
2744-
}
27452760

2746-
// since exp!=0, finally the exp must be 1.
2747-
// Deal with the final bit of the exponent separately, since
2748-
// squaring the base afterwards is not necessary and may cause a
2749-
// needless overflow.
2750-
acc * base
2761+
// since exp!=0, finally the exp must be 1.
2762+
// Deal with the final bit of the exponent separately, since
2763+
// squaring the base afterwards is not necessary and may cause a
2764+
// needless overflow.
2765+
acc * base
2766+
} else {
2767+
// This is faster than the above when the exponent is not known
2768+
// at compile time. We can't use the same code for the constant
2769+
// exponent case because LLVM is currently unable to unroll
2770+
// this loop.
2771+
loop {
2772+
if (exp & 1) == 1 {
2773+
acc = acc * base;
2774+
// since exp!=0, finally the exp must be 1.
2775+
if exp == 1 {
2776+
return acc;
2777+
}
2778+
}
2779+
exp /= 2;
2780+
base = base * base;
2781+
}
2782+
}
27512783
}
27522784

27532785
/// Returns the square root of the number, rounded down.

library/core/src/num/uint_macros.rs

+75-46
Original file line numberDiff line numberDiff line change
@@ -1622,20 +1622,17 @@ macro_rules! uint_impl {
16221622
let mut base = self;
16231623
let mut acc: Self = 1;
16241624

1625-
while exp > 1 {
1625+
loop {
16261626
if (exp & 1) == 1 {
16271627
acc = try_opt!(acc.checked_mul(base));
1628+
// since exp!=0, finally the exp must be 1.
1629+
if exp == 1 {
1630+
return Some(acc);
1631+
}
16281632
}
16291633
exp /= 2;
16301634
base = try_opt!(base.checked_mul(base));
16311635
}
1632-
1633-
// since exp!=0, finally the exp must be 1.
1634-
// Deal with the final bit of the exponent separately, since
1635-
// squaring the base afterwards is not necessary and may cause a
1636-
// needless overflow.
1637-
1638-
acc.checked_mul(base)
16391636
}
16401637

16411638
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -1675,18 +1672,17 @@ macro_rules! uint_impl {
16751672
let mut base = self;
16761673
let mut acc: Self = 1;
16771674

1678-
while exp > 1 {
1675+
loop {
16791676
if (exp & 1) == 1 {
16801677
acc = acc.strict_mul(base);
1678+
// since exp!=0, finally the exp must be 1.
1679+
if exp == 1 {
1680+
return acc;
1681+
}
16811682
}
16821683
exp /= 2;
16831684
base = base.strict_mul(base);
16841685
}
1685-
// since exp!=0, finally the exp must be 1.
1686-
// Deal with the final bit of the exponent separately, since
1687-
// squaring the base afterwards is not necessary and may cause a
1688-
// needless overflow.
1689-
acc.strict_mul(base)
16901686
}
16911687

16921688
/// Saturating integer addition. Computes `self + rhs`, saturating at
@@ -2138,26 +2134,44 @@ macro_rules! uint_impl {
21382134
#[must_use = "this returns the result of the operation, \
21392135
without modifying the original"]
21402136
#[inline]
2137+
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
21412138
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
21422139
if exp == 0 {
21432140
return 1;
21442141
}
21452142
let mut base = self;
21462143
let mut acc: Self = 1;
21472144

2148-
while exp > 1 {
2149-
if (exp & 1) == 1 {
2150-
acc = acc.wrapping_mul(base);
2145+
if intrinsics::is_val_statically_known(exp) {
2146+
while exp > 1 {
2147+
if (exp & 1) == 1 {
2148+
acc = acc.wrapping_mul(base);
2149+
}
2150+
exp /= 2;
2151+
base = base.wrapping_mul(base);
21512152
}
2152-
exp /= 2;
2153-
base = base.wrapping_mul(base);
2154-
}
21552153

2156-
// since exp!=0, finally the exp must be 1.
2157-
// Deal with the final bit of the exponent separately, since
2158-
// squaring the base afterwards is not necessary and may cause a
2159-
// needless overflow.
2160-
acc.wrapping_mul(base)
2154+
// since exp!=0, finally the exp must be 1.
2155+
// Deal with the final bit of the exponent separately, since
2156+
// squaring the base afterwards is not necessary.
2157+
acc.wrapping_mul(base)
2158+
} else {
2159+
// This is faster than the above when the exponent is not known
2160+
// at compile time. We can't use the same code for the constant
2161+
// exponent case because LLVM is currently unable to unroll
2162+
// this loop.
2163+
loop {
2164+
if (exp & 1) == 1 {
2165+
acc = acc.wrapping_mul(base);
2166+
// since exp!=0, finally the exp must be 1.
2167+
if exp == 1 {
2168+
return acc;
2169+
}
2170+
}
2171+
exp /= 2;
2172+
base = base.wrapping_mul(base);
2173+
}
2174+
}
21612175
}
21622176

21632177
/// Calculates `self` + `rhs`.
@@ -2603,9 +2617,14 @@ macro_rules! uint_impl {
26032617
// Scratch space for storing results of overflowing_mul.
26042618
let mut r;
26052619

2606-
while exp > 1 {
2620+
loop {
26072621
if (exp & 1) == 1 {
26082622
r = acc.overflowing_mul(base);
2623+
// since exp!=0, finally the exp must be 1.
2624+
if exp == 1 {
2625+
r.1 |= overflown;
2626+
return r;
2627+
}
26092628
acc = r.0;
26102629
overflown |= r.1;
26112630
}
@@ -2614,15 +2633,6 @@ macro_rules! uint_impl {
26142633
base = r.0;
26152634
overflown |= r.1;
26162635
}
2617-
2618-
// since exp!=0, finally the exp must be 1.
2619-
// Deal with the final bit of the exponent separately, since
2620-
// squaring the base afterwards is not necessary and may cause a
2621-
// needless overflow.
2622-
r = acc.overflowing_mul(base);
2623-
r.1 |= overflown;
2624-
2625-
r
26262636
}
26272637

26282638
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2640,26 +2650,45 @@ macro_rules! uint_impl {
26402650
without modifying the original"]
26412651
#[inline]
26422652
#[rustc_inherit_overflow_checks]
2653+
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
26432654
pub const fn pow(self, mut exp: u32) -> Self {
26442655
if exp == 0 {
26452656
return 1;
26462657
}
26472658
let mut base = self;
26482659
let mut acc = 1;
26492660

2650-
while exp > 1 {
2651-
if (exp & 1) == 1 {
2652-
acc = acc * base;
2661+
if intrinsics::is_val_statically_known(exp) {
2662+
while exp > 1 {
2663+
if (exp & 1) == 1 {
2664+
acc = acc * base;
2665+
}
2666+
exp /= 2;
2667+
base = base * base;
26532668
}
2654-
exp /= 2;
2655-
base = base * base;
2656-
}
26572669

2658-
// since exp!=0, finally the exp must be 1.
2659-
// Deal with the final bit of the exponent separately, since
2660-
// squaring the base afterwards is not necessary and may cause a
2661-
// needless overflow.
2662-
acc * base
2670+
// since exp!=0, finally the exp must be 1.
2671+
// Deal with the final bit of the exponent separately, since
2672+
// squaring the base afterwards is not necessary and may cause a
2673+
// needless overflow.
2674+
acc * base
2675+
} else {
2676+
// This is faster than the above when the exponent is not known
2677+
// at compile time. We can't use the same code for the constant
2678+
// exponent case because LLVM is currently unable to unroll
2679+
// this loop.
2680+
loop {
2681+
if (exp & 1) == 1 {
2682+
acc = acc * base;
2683+
// since exp!=0, finally the exp must be 1.
2684+
if exp == 1 {
2685+
return acc;
2686+
}
2687+
}
2688+
exp /= 2;
2689+
base = base * base;
2690+
}
2691+
}
26632692
}
26642693

26652694
/// Returns the square root of the number, rounded down.

0 commit comments

Comments
 (0)