Skip to content

Commit 9dee590

Browse files
authored
Rearrange the pipeline of pow to gain efficiency
The check of the `exp` parameter seems useless if we execute the while-loop more than once. The original implementation of `pow` function using one more comparison if the `exp==0` and may break the pipeline of the cpu, which may generate a slower code. The performance gap between the old and the new implementation may be small, but IMO, at least the newer one looks more beautiful. --- bench prog: ``` #![feature(test)] extern crate test; #[macro_export]macro_rules! timing{ ($a:expr)=>{let time=std::time::Instant::now();{$a;}print!("{:?} ",time.elapsed())}; ($a:expr,$b:literal)=>{let time=std::time::Instant::now();let mut a=0;for _ in 0..$b{a^=$a;}print!("{:?} {} ",time.elapsed(),a)} } #[inline] pub fn pow_rust(x:i64, mut exp: u32) -> i64 { let mut base = x; let mut acc = 1; while exp > 1 { if (exp & 1) == 1 { acc = acc * base; } exp /= 2; base = base * base; } if exp == 1 { acc = acc * base; } acc } #[inline] pub fn pow_new(x:i64, mut exp: u32) -> i64 { if exp==0{ 1 }else{ let mut base = x; let mut acc = 1; while exp > 1 { if (exp & 1) == 1 { acc = acc * base; } exp >>= 1; base = base * base; } acc * base } } fn main(){ let a=2i64; let b=1_u32; println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); } ``` bench in my laptop: ``` neutron@Neutron:/me/rust$ rc commit.rs rustc commit.rs && ./commit 3.978419716s 0 4.079765171s 0 3.964630622s 0 3.997127013s 0 4.260304804s 0 3.997638211s 0 3.963195544s 0 4.11657718s 0 4.176054164s 0 3.830128579s 0 3.980396122s 0 3.937258567s 0 3.986055948s 0 4.127804162s 0 4.018943411s 0 4.185568857s 0 4.217512517s 0 3.98313603s 0 3.863018225s 0 4.030447988s 0 3.694878237s 0 4.206987927s 0 4.137608047s 0 4.115564664s 0 neutron@Neutron:/me/rust$ rc commit.rs -O rustc commit.rs -O && ./commit 162.111993ms 0 165.107125ms 0 166.26924ms 0 175.20479ms 0 205.062565ms 0 176.278791ms 0 174.408975ms 0 166.526899ms 0 201.857604ms 0 146.190062ms 0 168.592821ms 0 154.61411ms 0 199.678912ms 0 168.411598ms 0 162.129996ms 0 147.420765ms 0 209.759326ms 0 154.807907ms 0 165.507134ms 0 188.476239ms 0 157.351524ms 0 121.320123ms 0 126.401229ms 0 114.86428ms 0 ```
1 parent d9e8d62 commit 9dee590

File tree

1 file changed

+43
-45
lines changed

1 file changed

+43
-45
lines changed

src/libcore/num/mod.rs

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,9 @@ $EndFeature, "
11031103
without modifying the original"]
11041104
#[inline]
11051105
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
1106+
if exp == 0 {
1107+
return Some(1);
1108+
}
11061109
let mut base = self;
11071110
let mut acc: Self = 1;
11081111

@@ -1113,15 +1116,11 @@ $EndFeature, "
11131116
exp /= 2;
11141117
base = try_opt!(base.checked_mul(base));
11151118
}
1116-
1119+
// since exp!=0, finally the exp must be 1.
11171120
// Deal with the final bit of the exponent separately, since
11181121
// squaring the base afterwards is not necessary and may cause a
11191122
// needless overflow.
1120-
if exp == 1 {
1121-
acc = try_opt!(acc.checked_mul(base));
1122-
}
1123-
1124-
Some(acc)
1123+
Some(try_opt!(acc.checked_mul(base)))
11251124
}
11261125
}
11271126

@@ -1631,6 +1630,9 @@ $EndFeature, "
16311630
without modifying the original"]
16321631
#[inline]
16331632
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
1633+
if exp == 0 {
1634+
return 1;
1635+
}
16341636
let mut base = self;
16351637
let mut acc: Self = 1;
16361638

@@ -1641,15 +1643,12 @@ $EndFeature, "
16411643
exp /= 2;
16421644
base = base.wrapping_mul(base);
16431645
}
1644-
1646+
1647+
// since exp!=0, finally the exp must be 1.
16451648
// Deal with the final bit of the exponent separately, since
16461649
// squaring the base afterwards is not necessary and may cause a
16471650
// needless overflow.
1648-
if exp == 1 {
1649-
acc = acc.wrapping_mul(base);
1650-
}
1651-
1652-
acc
1651+
acc.wrapping_mul(base);
16531652
}
16541653
}
16551654

@@ -1999,6 +1998,9 @@ $EndFeature, "
19991998
without modifying the original"]
20001999
#[inline]
20012000
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
2001+
if exp == 0 {
2002+
return (1,false);
2003+
}
20022004
let mut base = self;
20032005
let mut acc: Self = 1;
20042006
let mut overflown = false;
@@ -2016,17 +2018,14 @@ $EndFeature, "
20162018
base = r.0;
20172019
overflown |= r.1;
20182020
}
2019-
2021+
2022+
// since exp!=0, finally the exp must be 1.
20202023
// Deal with the final bit of the exponent separately, since
20212024
// squaring the base afterwards is not necessary and may cause a
20222025
// needless overflow.
2023-
if exp == 1 {
2024-
r = acc.overflowing_mul(base);
2025-
acc = r.0;
2026-
overflown |= r.1;
2027-
}
2028-
2029-
(acc, overflown)
2026+
r = acc.overflowing_mul(base);
2027+
r.1 |= overflown;
2028+
r
20302029
}
20312030
}
20322031

@@ -2050,6 +2049,9 @@ $EndFeature, "
20502049
#[inline]
20512050
#[rustc_inherit_overflow_checks]
20522051
pub const fn pow(self, mut exp: u32) -> Self {
2052+
if exp == 0 {
2053+
return 1;
2054+
}
20532055
let mut base = self;
20542056
let mut acc = 1;
20552057

@@ -2061,14 +2063,11 @@ $EndFeature, "
20612063
base = base * base;
20622064
}
20632065

2066+
// since exp!=0, finally the exp must be 1.
20642067
// Deal with the final bit of the exponent separately, since
20652068
// squaring the base afterwards is not necessary and may cause a
20662069
// needless overflow.
2067-
if exp == 1 {
2068-
acc = acc * base;
2069-
}
2070-
2071-
acc
2070+
acc * base
20722071
}
20732072
}
20742073

@@ -3306,6 +3305,9 @@ assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);", $EndFeature, "
33063305
without modifying the original"]
33073306
#[inline]
33083307
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
3308+
if exp == 0 {
3309+
return Some(1);
3310+
}
33093311
let mut base = self;
33103312
let mut acc: Self = 1;
33113313

@@ -3317,14 +3319,12 @@ assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);", $EndFeature, "
33173319
base = try_opt!(base.checked_mul(base));
33183320
}
33193321

3322+
// since exp!=0, finally the exp must be 1.
33203323
// Deal with the final bit of the exponent separately, since
33213324
// squaring the base afterwards is not necessary and may cause a
33223325
// needless overflow.
3323-
if exp == 1 {
3324-
acc = try_opt!(acc.checked_mul(base));
3325-
}
33263326

3327-
Some(acc)
3327+
Some(try_opt!(acc.checked_mul(base)))
33283328
}
33293329
}
33303330

@@ -3715,6 +3715,9 @@ assert_eq!(3u8.wrapping_pow(6), 217);", $EndFeature, "
37153715
without modifying the original"]
37163716
#[inline]
37173717
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
3718+
if exp == 0 {
3719+
return 1;
3720+
}
37183721
let mut base = self;
37193722
let mut acc: Self = 1;
37203723

@@ -3726,14 +3729,11 @@ assert_eq!(3u8.wrapping_pow(6), 217);", $EndFeature, "
37263729
base = base.wrapping_mul(base);
37273730
}
37283731

3732+
// since exp!=0, finally the exp must be 1.
37293733
// Deal with the final bit of the exponent separately, since
37303734
// squaring the base afterwards is not necessary and may cause a
37313735
// needless overflow.
3732-
if exp == 1 {
3733-
acc = acc.wrapping_mul(base);
3734-
}
3735-
3736-
acc
3736+
acc.wrapping_mul(base)
37373737
}
37383738
}
37393739

@@ -4058,16 +4058,14 @@ assert_eq!(3u8.overflowing_pow(6), (217, true));", $EndFeature, "
40584058
overflown |= r.1;
40594059
}
40604060

4061+
// since exp!=0, finally the exp must be 1.
40614062
// Deal with the final bit of the exponent separately, since
40624063
// squaring the base afterwards is not necessary and may cause a
40634064
// needless overflow.
4064-
if exp == 1 {
4065-
r = acc.overflowing_mul(base);
4066-
acc = r.0;
4067-
overflown |= r.1;
4068-
}
4065+
r = acc.overflowing_mul(base);
4066+
r.1 |= overflown;
40694067

4070-
(acc, overflown)
4068+
r
40714069
}
40724070
}
40734071

@@ -4088,6 +4086,9 @@ Basic usage:
40884086
#[inline]
40894087
#[rustc_inherit_overflow_checks]
40904088
pub const fn pow(self, mut exp: u32) -> Self {
4089+
if exp == 0 {
4090+
return 1;
4091+
}
40914092
let mut base = self;
40924093
let mut acc = 1;
40934094

@@ -4099,14 +4100,11 @@ Basic usage:
40994100
base = base * base;
41004101
}
41014102

4103+
// since exp!=0, finally the exp must be 1.
41024104
// Deal with the final bit of the exponent separately, since
41034105
// squaring the base afterwards is not necessary and may cause a
41044106
// needless overflow.
4105-
if exp == 1 {
4106-
acc = acc * base;
4107-
}
4108-
4109-
acc
4107+
acc * base
41104108
}
41114109
}
41124110

0 commit comments

Comments
 (0)