Skip to content

Commit 7bfc0bd

Browse files
committed
Refactor handling of historical data
1 parent 21979c8 commit 7bfc0bd

File tree

1 file changed

+23
-83
lines changed

1 file changed

+23
-83
lines changed

site/src/comparison.rs

Lines changed: 23 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,8 @@ async fn compare_given_commits(
606606
let statistics_for_a = statistics_from_series(&mut responses);
607607
let statistics_for_b = statistics_from_series(&mut responses);
608608

609-
let variances = BenchmarkVariances::calculate(ctxt, a.clone(), master_commits, stat).await?;
609+
let mut historical_data =
610+
HistoricalDataMap::calculate(ctxt, a.clone(), master_commits, stat).await?;
610611
let statistics = statistics_for_a
611612
.into_iter()
612613
.filter_map(|(test_case, a)| {
@@ -616,7 +617,7 @@ async fn compare_given_commits(
616617
benchmark: test_case.0,
617618
profile: test_case.1,
618619
scenario: test_case.2,
619-
variance: variances.data.get(&test_case).cloned(),
620+
historical_data: historical_data.data.remove(&test_case),
620621
results: (a, b),
621622
})
622623
})
@@ -815,14 +816,13 @@ impl Comparison {
815816
}
816817
}
817818

818-
/// A description of the amount of variance a certain benchmark is historically
819-
/// experiencing at a given point in time.
820-
pub struct BenchmarkVariances {
821-
/// Variance data on a per test case basis
822-
pub data: HashMap<(Benchmark, Profile, Scenario), BenchmarkVariance>,
819+
/// The historical data for a certain benchmark
820+
pub struct HistoricalDataMap {
821+
/// Historical data on a per test case basis
822+
pub data: HashMap<(Benchmark, Profile, Scenario), HistoricalData>,
823823
}
824824

825-
impl BenchmarkVariances {
825+
impl HistoricalDataMap {
826826
const NUM_PREVIOUS_COMMITS: usize = 100;
827827
const MIN_PREVIOUS_COMMITS: usize = 50;
828828

@@ -832,18 +832,18 @@ impl BenchmarkVariances {
832832
master_commits: &[collector::MasterCommit],
833833
stat: String,
834834
) -> Result<Self, BoxedError> {
835-
let mut variance_data = HashMap::new();
835+
let mut historical_data = HashMap::new();
836836

837837
let previous_commits = Arc::new(previous_commits(
838838
from,
839839
Self::NUM_PREVIOUS_COMMITS,
840840
master_commits,
841841
));
842842

843-
// Return early if we don't have enough data to calculate variance.
843+
// Return early if we don't have enough data for historical analysis
844844
if previous_commits.len() < Self::MIN_PREVIOUS_COMMITS {
845845
return Ok(Self {
846-
data: variance_data,
846+
data: historical_data,
847847
});
848848
}
849849

@@ -860,37 +860,25 @@ impl BenchmarkVariances {
860860

861861
for _ in previous_commits.iter() {
862862
for (test_case, stat) in statistics_from_series(&mut previous_commit_series) {
863-
variance_data.entry(test_case).or_default().push(stat);
863+
historical_data.entry(test_case).or_default().push(stat);
864864
}
865865
}
866866

867867
// Only retain test cases for which we have enough data to calculate variance.
868-
variance_data.retain(|_, v| v.data.len() >= Self::MIN_PREVIOUS_COMMITS);
869-
870-
for ((bench, _, _), results) in variance_data.iter_mut() {
871-
log::trace!("Calculating variance for: {}", bench);
872-
results.calculate_description();
873-
}
868+
historical_data.retain(|_, v| v.data.len() >= Self::MIN_PREVIOUS_COMMITS);
874869

875870
Ok(Self {
876-
data: variance_data,
871+
data: historical_data,
877872
})
878873
}
879874
}
880875

881876
#[derive(Debug, Default, Clone, Serialize)]
882-
pub struct BenchmarkVariance {
877+
pub struct HistoricalData {
883878
data: Vec<f64>,
884-
description: BenchmarkVarianceDescription,
885879
}
886880

887-
impl BenchmarkVariance {
888-
/// The ratio of change that we consider significant.
889-
const SIGNFICANT_DELTA_THRESHOLD: f64 = 0.01;
890-
/// The percentage of significant changes that we consider too high
891-
const SIGNFICANT_CHANGE_THRESHOLD: f64 = 5.0;
892-
/// The ratio of change that constitutes noisy data
893-
const NOISE_THRESHOLD: f64 = 0.001;
881+
impl HistoricalData {
894882
/// The multiple of the IQR above Q3 that signifies significance
895883
const IQR_MULTIPLIER: f64 = 3.0;
896884

@@ -954,36 +942,6 @@ impl BenchmarkVariance {
954942
(q1, q3)
955943
}
956944

957-
fn calculate_description(&mut self) {
958-
self.description = BenchmarkVarianceDescription::Normal;
959-
960-
let percent_changes = self.percent_changes();
961-
let non_significant = percent_changes
962-
.iter()
963-
.take_while(|&&c| c < Self::SIGNFICANT_DELTA_THRESHOLD)
964-
.collect::<Vec<_>>();
965-
966-
let percent_significant_changes = ((percent_changes.len() - non_significant.len()) as f64
967-
/ percent_changes.len() as f64)
968-
* 100.0;
969-
log::trace!(
970-
"Percent significant changes: {:.1}%",
971-
percent_significant_changes
972-
);
973-
974-
if percent_significant_changes > Self::SIGNFICANT_CHANGE_THRESHOLD {
975-
self.description = BenchmarkVarianceDescription::HighlyVariable;
976-
return;
977-
}
978-
979-
let delta_mean =
980-
non_significant.iter().cloned().sum::<f64>() / (non_significant.len() as f64);
981-
log::trace!("Ratio change: {:.4}", delta_mean);
982-
if delta_mean > Self::NOISE_THRESHOLD {
983-
self.description = BenchmarkVarianceDescription::Noisy;
984-
}
985-
}
986-
987945
// Absolute deltas between adjacent results
988946
fn deltas(&self) -> impl Iterator<Item = f64> + '_ {
989947
self.data
@@ -999,24 +957,6 @@ impl BenchmarkVariance {
999957
}
1000958
}
1001959

1002-
#[derive(Debug, Clone, Copy, Serialize)]
1003-
#[serde(tag = "type", content = "percent")]
1004-
pub enum BenchmarkVarianceDescription {
1005-
Normal,
1006-
/// A highly variable benchmark that produces many significant changes.
1007-
/// This might indicate a benchmark which is very sensitive to compiler changes.
1008-
HighlyVariable,
1009-
/// A noisy benchmark which is likely to see changes in performance simply between
1010-
/// compiler runs.
1011-
Noisy,
1012-
}
1013-
1014-
impl Default for BenchmarkVarianceDescription {
1015-
fn default() -> Self {
1016-
Self::Normal
1017-
}
1018-
}
1019-
1020960
/// Gets the previous commit
1021961
pub fn prev_commit<'a>(
1022962
artifact: &ArtifactId,
@@ -1048,14 +988,14 @@ pub struct TestResultComparison {
1048988
benchmark: Benchmark,
1049989
profile: Profile,
1050990
scenario: Scenario,
1051-
variance: Option<BenchmarkVariance>,
991+
historical_data: Option<HistoricalData>,
1052992
results: (f64, f64),
1053993
}
1054994

1055995
impl TestResultComparison {
1056996
/// The amount of relative change considered significant when
1057997
/// we cannot determine from historical data
1058-
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD: f64 = 0.002;
998+
const DEFAULT_SIGNIFICANCE_THRESHOLD: f64 = 0.002;
1059999

10601000
pub fn benchmark(&self) -> Benchmark {
10611001
self.benchmark
@@ -1077,10 +1017,10 @@ impl TestResultComparison {
10771017

10781018
/// Magnitude of change considered significant
10791019
fn significance_threshold(&self) -> f64 {
1080-
self.variance
1020+
self.historical_data
10811021
.as_ref()
1082-
.map(|v| v.significance_threshold())
1083-
.unwrap_or(Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD)
1022+
.map(|d| d.significance_threshold())
1023+
.unwrap_or(Self::DEFAULT_SIGNIFICANCE_THRESHOLD)
10841024
}
10851025

10861026
/// This is a numeric magnitude of a particular change.
@@ -1139,7 +1079,7 @@ impl TestResultComparison {
11391079
}
11401080

11411081
fn is_dodgy(&self) -> bool {
1142-
self.variance
1082+
self.historical_data
11431083
.as_ref()
11441084
.map(|v| v.is_dodgy())
11451085
.unwrap_or(false)
@@ -1473,7 +1413,7 @@ mod tests {
14731413
benchmark: index.to_string().as_str().into(),
14741414
profile: Profile::Check,
14751415
scenario: Scenario::Empty,
1476-
variance: None,
1416+
historical_data: None,
14771417
results: (before, after),
14781418
});
14791419
}

0 commit comments

Comments
 (0)