Skip to content

Commit 8d3a2e9

Browse files
Merge pull request #1003 from Mark-Simulacrum/std-dev
Refactor significance calculations
2 parents eed7816 + 21c3774 commit 8d3a2e9

File tree

3 files changed

+47
-14
lines changed

3 files changed

+47
-14
lines changed

site/src/api.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ pub mod comparison {
199199
pub profile: String,
200200
pub scenario: String,
201201
pub is_significant: bool,
202-
pub significance_threshold: f64,
202+
pub significance_factor: Option<f64>,
203203
pub is_dodgy: bool,
204204
pub historical_statistics: Option<Vec<f64>>,
205205
pub statistics: (f64, f64),

site/src/comparison.rs

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ pub async fn handle_compare(
114114
scenario: comparison.scenario.to_string(),
115115
is_dodgy: comparison.is_dodgy(),
116116
is_significant: comparison.is_significant(),
117-
significance_threshold: comparison.signifcance_threshold() * 100.0,
117+
significance_factor: comparison.significance_factor(),
118118
historical_statistics: comparison.variance.map(|v| v.data),
119119
statistics: comparison.results,
120120
})
@@ -658,7 +658,30 @@ impl BenchmarkVariance {
658658
deltas
659659
}
660660

661-
fn upper_fence(&self) -> f64 {
661+
// This is an absolute value indicating the noise barrier for changes on
662+
// this benchmark.
663+
//
664+
// A number line could be divded like this:
665+
//
666+
// ------o-------o----------
667+
// ^ ^ ^
668+
// | | |
669+
// | | |
670+
// | | ---- +significance_threshold
671+
// | |
672+
// | - not significant, includes zero
673+
// |
674+
// ---- -significance_threshold()
675+
fn significance_threshold(&self) -> f64 {
676+
let (q1, median, q3) = self.quartiles();
677+
678+
// Changes that are IQR_MULTIPLIER away from the median are considered
679+
// significant (i.e. outliers).
680+
median + (q3 - q1) * Self::IQR_MULTIPLIER
681+
}
682+
683+
// (q1, q2=median, q3)
684+
fn quartiles(&self) -> (f64, f64, f64) {
662685
let pcs = self.percent_changes();
663686
fn median(data: &[f64]) -> f64 {
664687
if data.len() % 2 == 0 {
@@ -674,12 +697,11 @@ impl BenchmarkVariance {
674697
} else {
675698
(len / 2 - 1, len / 2 + 1)
676699
};
677-
// significance is determined by the upper
678-
// interquartile range fence
679700
let q1 = median(&pcs[..=h1_end]);
701+
let q2 = median(&pcs[..]);
680702
let q3 = median(&pcs[h2_begin..]);
681-
let iqr = q3 - q1;
682-
q3 + (iqr * Self::IQR_MULTIPLIER)
703+
704+
(q1, q2, q3)
683705
}
684706

685707
fn calculate_description(&mut self) {
@@ -727,7 +749,9 @@ impl BenchmarkVariance {
727749
BenchmarkVarianceDescription::Noisy | BenchmarkVarianceDescription::HighlyVariable
728750
)
729751
} else {
730-
self.upper_fence() > 0.002
752+
// If changes are judged significant only exceeding 0.2%, then the
753+
// benchmark as a whole is dodgy.
754+
self.significance_threshold() * 100.0 > 0.2
731755
}
732756
}
733757
}
@@ -805,10 +829,11 @@ impl TestResultComparison {
805829
}
806830

807831
fn is_significant(&self) -> bool {
808-
self.relative_change().abs() > self.signifcance_threshold()
832+
self.relative_change().abs() >= self.significance_threshold()
809833
}
810834

811-
fn signifcance_threshold(&self) -> f64 {
835+
// Magnitude of change considered significant
836+
fn significance_threshold(&self) -> f64 {
812837
if !self.calc_new_sig {
813838
if self.is_dodgy() {
814839
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY
@@ -818,14 +843,22 @@ impl TestResultComparison {
818843
} else {
819844
self.variance
820845
.as_ref()
821-
.map(|s| s.upper_fence())
846+
.map(|v| v.significance_threshold())
822847
.unwrap_or(Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD)
823848
}
824849
}
825850

851+
/// This is a numeric magnitude of a particular change.
852+
fn significance_factor(&self) -> Option<f64> {
853+
let change = self.relative_change();
854+
let threshold = self.significance_threshold();
855+
// How many times the treshold this change is.
856+
Some(change.abs() / threshold)
857+
}
858+
826859
fn magnitude(&self) -> Magnitude {
827860
let change = self.relative_change().abs();
828-
let threshold = self.signifcance_threshold();
861+
let threshold = self.significance_threshold();
829862
let over_threshold = if change < threshold * 1.5 {
830863
Magnitude::VerySmall
831864
} else if change < threshold * 3.0 {

site/static/compare.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
453453
</a>
454454
</td>
455455
<td>
456-
{{ Math.abs(run.percent / run.significanceThreshold).toFixed(2) }}x
456+
{{ run.significance_factor ? run.significance_factor.toFixed(2) + "x" :"-" }}
457457
</td>
458458
</tr>
459459
</template>
@@ -571,7 +571,7 @@ <h2>Comparing <span id="stat-header">{{stat}}</span> between <span id="before">{
571571
datumB,
572572
percent,
573573
isDodgy,
574-
significanceThreshold: r.significance_threshold,
574+
significance_factor: r.significance_factor,
575575
isSignificant
576576
});
577577
}

0 commit comments

Comments
 (0)