diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 730e5501..7bbe190d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -40,7 +40,9 @@ jobs:
- name: Prepare BSBM Benchmark
working-directory: bench
- run: cargo run --bin rdf-fusion-bench --profile codspeed -- prepare bsbm --dataset-size 1000
+ run: |
+ cargo run --bin rdf-fusion-bench --profile codspeed -- prepare bsbm-explore --dataset-size 1000
+ cargo run --bin rdf-fusion-bench --profile codspeed -- prepare bsbm-business-intelligence --dataset-size 1000
- name: Build the benchmark target(s)
# Limiting the number of jobs is an attempt to not violate GitHub's runner resources.
diff --git a/bench/Cargo.toml b/bench/Cargo.toml
index 4237b704..abc58ba5 100644
--- a/bench/Cargo.toml
+++ b/bench/Cargo.toml
@@ -38,5 +38,9 @@ codspeed-criterion-compat = { workspace = true, features = ["async_tokio"] }
workspace = true
[[bench]]
-name = "bsbm"
+name = "bsbm_explore"
+harness = false
+
+[[bench]]
+name = "bsbm_business_intelligence"
harness = false
\ No newline at end of file
diff --git a/bench/benches/bsbm_business_intelligence.rs b/bench/benches/bsbm_business_intelligence.rs
new file mode 100644
index 00000000..d83bf4e0
--- /dev/null
+++ b/bench/benches/bsbm_business_intelligence.rs
@@ -0,0 +1,420 @@
+//! Runs certain queries from the BSBM benchmark suite as part of the regular benchmark suite.
+//!
+//! The particular instance of a query (they are generated randomly) is picked arbitrarily. If we
+//! ever decide that queries in this file are not representative, we can easily change the query.
+//!
+//! The tests assume the presence of the benchmark data.
+
+use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion};
+use futures::StreamExt;
+use rdf_fusion::io::RdfFormat;
+use rdf_fusion::store::Store;
+use rdf_fusion::{QueryOptions, QueryResults};
+use std::fs;
+use std::path::PathBuf;
+use tokio::runtime::{Builder, Runtime};
+
+fn bsbm_business_intelligence_q1(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 1", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store
+ .query_opt(
+ "
+ prefix bsbm:
+ prefix rev:
+
+ Select ?productType ?reviewCount
+ {
+ {
+ Select ?productType (count(?review) As ?reviewCount)
+ {
+ ?productType a bsbm:ProductType .
+ ?product a ?productType .
+ ?product bsbm:producer ?producer .
+ ?producer bsbm:country .
+ ?review bsbm:reviewFor ?product .
+ ?review rev:reviewer ?reviewer .
+ ?reviewer bsbm:country .
+ }
+ Group By ?productType
+ }
+ }
+ Order By desc(?reviewCount) ?productType
+ Limit 10
+ ",
+ QueryOptions::default(),
+ )
+ .await
+ .unwrap();
+ assert_number_of_results(result, 10).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q2(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 2", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+
+ SELECT ?otherProduct ?sameFeatures
+ {
+ ?otherProduct a bsbm:Product .
+ FILTER(?otherProduct != )
+ {
+ SELECT ?otherProduct (count(?otherFeature) As ?sameFeatures)
+ {
+ bsbm:productFeature ?feature .
+ ?otherProduct bsbm:productFeature ?otherFeature .
+ FILTER(?feature=?otherFeature)
+ }
+ Group By ?otherProduct
+ }
+ }
+ Order By desc(?sameFeatures) ?otherProduct
+ Limit 10
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 10).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q3(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 3", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix rev: prefix dc:
+ prefix xsd:
+ Select ?product (xsd:float(?monthCount)/?monthBeforeCount As ?ratio)
+ {
+ {
+ Select ?product (count(?review) As ?monthCount)
+ {
+ ?review bsbm:reviewFor ?product .
+ ?review dc:date ?date .
+ Filter(?date >= \"2007-10-10\"^^ && ?date < \"2007-11-07\"^^)\
+ }
+ Group By ?product
+ }
+ {
+ Select ?product (count(?review) As ?monthBeforeCount)
+ {
+ ?review bsbm:reviewFor ?product .
+ ?review dc:date ?date .
+ Filter(?date >= \"2007-09-12\"^^ && ?date < \"2007-10-10\"^^)\
+ }
+ Group By ?product
+ Having (count(?review)>0)
+ }
+ }
+ Order By desc(xsd:float(?monthCount) / ?monthBeforeCount) ?product
+ Limit 10
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 10).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q4(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 4", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix xsd:
+
+ Select ?feature (?withFeaturePrice/?withoutFeaturePrice As ?priceRatio)
+ {
+ {
+ Select ?feature (avg(xsd:float(xsd:string(?price))) As ?withFeaturePrice)
+ {
+ ?product a ;
+ bsbm:productFeature ?feature .
+ ?offer bsbm:product ?product ;
+ bsbm:price ?price .
+ }
+ Group By ?feature
+ }
+ {
+ Select ?feature (avg(xsd:float(xsd:string(?price))) As ?withoutFeaturePrice)
+ {
+ {
+ Select distinct ?feature
+ {
+ ?p a ;
+ bsbm:productFeature ?feature .
+ }
+ }
+ ?product a .
+ ?offer bsbm:product ?product ;
+ bsbm:price ?price .
+ FILTER NOT EXISTS { ?product bsbm:productFeature ?feature }
+ }
+ Group By ?feature
+ }
+ }
+ Order By desc(?withFeaturePrice/?withoutFeaturePrice) ?feature
+ Limit 10
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 10).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q5(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 5", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix rev:
+ prefix xsd:
+
+ Select ?country ?product ?nrOfReviews ?avgPrice
+ {
+ {
+ Select ?country (max(?nrOfReviews) As ?maxReviews)
+ {
+ {
+ Select ?country ?product (count(?review) As ?nrOfReviews)
+ {
+ ?product a .
+ ?review bsbm:reviewFor ?product ;
+ rev:reviewer ?reviewer .
+ ?reviewer bsbm:country ?country .
+ }
+ Group By ?country ?product
+ }
+ }
+ Group By ?country
+ }
+ {
+ Select ?country ?product (avg(xsd:float(xsd:string(?price))) As ?avgPrice)
+ {
+ ?product a .
+ ?offer bsbm:product ?product .
+ ?offer bsbm:price ?price .
+ }
+ Group By ?country ?product
+ }
+ {
+ Select ?country ?product (count(?review) As ?nrOfReviews)
+ {
+ ?product a .
+ ?review bsbm:reviewFor ?product .
+ ?review rev:reviewer ?reviewer .
+ ?reviewer bsbm:country ?country .
+ }
+ Group By ?country ?product
+ }
+ FILTER(?nrOfReviews=?maxReviews)
+ }
+ Order By desc(?nrOfReviews) ?country ?product
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 12).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q6(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 6", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix rev:
+ prefix xsd:
+
+ Select ?reviewer (avg(xsd:float(?score)) As ?reviewerAvgScore)
+ {
+ {
+ Select (avg(xsd:float(?score)) As ?avgScore)
+ {
+ ?product bsbm:producer .
+ ?review bsbm:reviewFor ?product .
+ { ?review bsbm:rating1 ?score . } UNION
+ { ?review bsbm:rating2 ?score . } UNION
+ { ?review bsbm:rating3 ?score . } UNION
+ { ?review bsbm:rating4 ?score . }
+ }
+ }
+ ?product bsbm:producer .
+ ?review bsbm:reviewFor ?product .
+ ?review rev:reviewer ?reviewer .
+ { ?review bsbm:rating1 ?score . } UNION
+ { ?review bsbm:rating2 ?score . } UNION
+ { ?review bsbm:rating3 ?score . } UNION
+ { ?review bsbm:rating4 ?score . }
+ }
+ Group By ?reviewer
+ Having (avg(xsd:float(?score)) > min(?avgScore) * 1.5)
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 12).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q7(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 7", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix xsd:
+
+ Select ?product
+ {
+ {
+ Select ?product
+ {
+ {
+ Select ?product (count(?offer) As ?offerCount)
+ {
+ ?product a .
+ ?offer bsbm:product ?product .
+ }
+ Group By ?product
+ }
+ }
+ Order By desc(?offerCount)
+ Limit 1000
+ }
+ FILTER NOT EXISTS
+ {
+ ?offer bsbm:product ?product .
+ ?offer bsbm:vendor ?vendor .
+ ?vendor bsbm:country ?country .
+ FILTER(?country=)
+ }
+ }
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 16).await;
+ });
+ });
+}
+
+fn bsbm_business_intelligence_q8(c: &mut Criterion) {
+ let runtime = create_runtime();
+ let store = runtime.block_on(load_bsbm_1000()).unwrap();
+
+ c.bench_function("BSBM Business Intelligence 1000 - Query 8", |b| {
+ b.to_async(&runtime).iter(|| async {
+ let result = store.query_opt("
+ prefix bsbm:
+ prefix bsbm-inst:
+ prefix xsd:
+
+ Select ?vendor (xsd:float(?belowAvg)/?offerCount As ?cheapExpensiveRatio)
+ {
+ {
+ Select ?vendor (count(?offer) As ?belowAvg)
+ {
+ {
+ ?product a .
+ ?offer bsbm:product ?product .
+ ?offer bsbm:vendor ?vendor .
+ ?offer bsbm:price ?price .
+ {
+ Select ?product (avg(xsd:float(xsd:string(?price))) As ?avgPrice)
+ {
+ ?product a .
+ ?offer bsbm:product ?product .
+ ?offer bsbm:vendor ?vendor .
+ ?offer bsbm:price ?price .
+ }
+ Group By ?product
+ }
+ } .
+ FILTER (xsd:float(xsd:string(?price)) < ?avgPrice)
+ }
+ Group By ?vendor
+ }
+ {
+ Select ?vendor (count(?offer) As ?offerCount)
+ {
+ ?product a .
+ ?offer bsbm:product ?product .
+ ?offer bsbm:vendor ?vendor .
+ }
+ Group By ?vendor
+ }
+ }
+ Order by desc(xsd:float(?belowAvg)/?offerCount) ?vendor
+ limit 10
+ ", QueryOptions::default()).await.unwrap();
+ assert_number_of_results(result, 10).await;
+ });
+ });
+}
+
+criterion_group!(
+ bsbm_business_intelligence,
+ bsbm_business_intelligence_q1,
+ bsbm_business_intelligence_q2,
+ // bsbm_business_intelligence_q3, TODO failing
+ // bsbm_business_intelligence_q4, TODO failing
+ bsbm_business_intelligence_q5,
+ bsbm_business_intelligence_q6,
+ bsbm_business_intelligence_q7,
+ // bsbm_business_intelligence_q8, TODO failing
+);
+criterion_main!(bsbm_business_intelligence);
+
+fn create_runtime() -> Runtime {
+ Builder::new_current_thread().enable_all().build().unwrap()
+}
+
+async fn load_bsbm_1000() -> anyhow::Result {
+ let data_path = PathBuf::from("./data/dataset-1000.nt");
+ let data = fs::read(data_path)?;
+ let memory_store = Store::new();
+ memory_store
+ .load_from_reader(RdfFormat::NTriples, data.as_slice())
+ .await?;
+ Ok(memory_store)
+}
+
+async fn assert_number_of_results(result: QueryResults, n: usize) {
+ match result {
+ QueryResults::Solutions(mut solutions) => {
+ let mut count = 0;
+ while let Some(sol) = solutions.next().await {
+ sol.unwrap();
+ count += 1;
+ }
+ assert_eq!(count, n);
+ }
+ QueryResults::Graph(mut triples) => {
+ let mut count = 0;
+ while let Some(sol) = triples.next().await {
+ sol.unwrap();
+ count += 1;
+ }
+ assert_eq!(count, n);
+ }
+ _ => panic!("Unexpected QueryResults"),
+ }
+}
diff --git a/bench/benches/bsbm.rs b/bench/benches/bsbm_explore.rs
similarity index 100%
rename from bench/benches/bsbm.rs
rename to bench/benches/bsbm_explore.rs
diff --git a/bench/src/benchmarks/bsbm/business_intelligence/benchmark.rs b/bench/src/benchmarks/bsbm/business_intelligence/benchmark.rs
new file mode 100644
index 00000000..382e832e
--- /dev/null
+++ b/bench/src/benchmarks/bsbm/business_intelligence/benchmark.rs
@@ -0,0 +1,235 @@
+use crate::benchmarks::bsbm::business_intelligence::operation::{
+ list_raw_operations, BsbmBusinessIntelligenceOperation, BsbmBusinessIntelligenceRawOperation,
+};
+use crate::benchmarks::bsbm::business_intelligence::report::{
+ BusinessIntelligenceReport, BusinessIntelligenceReportBuilder,
+};
+use crate::benchmarks::bsbm::BsbmDatasetSize;
+use crate::benchmarks::{Benchmark, BenchmarkName};
+use crate::environment::{BenchmarkContext, RdfFusionBenchContext};
+use crate::prepare::{ArchiveType, FileDownloadAction, PrepRequirement};
+use crate::report::BenchmarkReport;
+use crate::runs::BenchmarkRun;
+use async_trait::async_trait;
+use futures::StreamExt;
+use rdf_fusion::io::RdfFormat;
+use rdf_fusion::store::Store;
+use rdf_fusion::{Query, QueryOptions, QueryResults};
+use reqwest::Url;
+use std::fs;
+use std::fs::File;
+use std::path::PathBuf;
+use tokio::time::Instant;
+
+/// The [Berlin SPARQL Benchmark](http://wbsg.informatik.uni-mannheim.de/bizer/berlinsparqlbenchmark/)
+/// is a widely adopted benchmark built around an e-commerce use case.
+///
+/// This version of the benchmark uses the [pre-prepared datasets](https://zenodo.org/records/12663333)
+/// from Oxigraph.
+pub struct BsbmBusinessIntelligenceBenchmark {
+ name: BenchmarkName,
+ dataset_size: BsbmDatasetSize,
+ max_query_count: Option,
+}
+
+impl BsbmBusinessIntelligenceBenchmark {
+ /// Creates a new [BsbmBusinessIntelligenceBenchmark] with the given sizes.
+ pub fn new(dataset_size: BsbmDatasetSize, max_query_count: Option) -> Self {
+ let name = BenchmarkName::BsbmBusinessIntelligence {
+ dataset_size,
+ max_query_count,
+ };
+ Self {
+ name,
+ dataset_size,
+ max_query_count,
+ }
+ }
+
+ /// The BSBM also generates many queries that are tailored to the generated data. This method
+ /// returns a list of queries that should be executed during this run.
+ fn list_operations(
+ &self,
+ env: &RdfFusionBenchContext,
+ ) -> anyhow::Result> {
+ println!("Loading queries ...");
+
+ let queries_path = env.join_data_dir(
+ PathBuf::from(format!("businessIntelligence-{}.csv", self.dataset_size)).as_path(),
+ )?;
+ let result = match self.max_query_count {
+ None => list_raw_operations(&queries_path)?
+ .filter_map(parse_query)
+ .collect(),
+ Some(max_query_count) => list_raw_operations(&queries_path)?
+ .filter_map(parse_query)
+ .take(usize::try_from(max_query_count)?)
+ .collect(),
+ };
+
+ println!("Queries loaded.");
+ Ok(result)
+ }
+
+ async fn prepare_store(&self, bench_context: &BenchmarkContext<'_>) -> anyhow::Result {
+ println!("Creating in-memory store and loading data ...");
+ let data_path = bench_context
+ .parent()
+ .join_data_dir(PathBuf::from(format!("dataset-{}.nt", self.dataset_size)).as_path())?;
+ let data = fs::read(data_path)?;
+ let memory_store = Store::new();
+ memory_store
+ .load_from_reader(RdfFormat::NTriples, data.as_slice())
+ .await?;
+ println!("Store created and data loaded.");
+ Ok(memory_store)
+ }
+}
+
+#[async_trait]
+impl Benchmark for BsbmBusinessIntelligenceBenchmark {
+ fn name(&self) -> BenchmarkName {
+ self.name
+ }
+
+ #[allow(clippy::expect_used)]
+ fn requirements(&self) -> Vec {
+ let dataset_size = self.dataset_size;
+ let download_bsbm_tools = PrepRequirement::FileDownload {
+ url: Url::parse("https://github.com/Tpt/bsbm-tools/archive/59d0a8a605b26f21506789fa1a713beb5abf1cab.zip")
+ .expect("parse dataset-name"),
+ file_name: PathBuf::from("bsbmtools"),
+ action: Some(FileDownloadAction::Unpack(ArchiveType::Zip)),
+ };
+ let generate_dataset = PrepRequirement::RunCommand {
+ workdir: PathBuf::from("./bsbmtools"),
+ program: "./generate".to_owned(),
+ args: vec![
+ "-fc".to_owned(),
+ "-pc".to_owned(),
+ format!("{}", dataset_size),
+ "-dir".to_owned(),
+ "../td_data".to_owned(),
+ "-fn".to_owned(),
+ format!("../dataset-{}", dataset_size),
+ ],
+ check_requirement: Box::new(move || {
+ let exists = File::open(format!("./data/dataset-{dataset_size}.nt")).is_ok();
+ Ok(exists)
+ }),
+ };
+ let download_pregenerated_queries = PrepRequirement::FileDownload {
+ url: Url::parse(
+ "https://zenodo.org/records/12663333/files/businessIntelligence-1000.csv.bz2",
+ )
+ .expect("parse dataset-name"),
+ file_name: PathBuf::from("businessIntelligence-1000.csv"),
+ action: Some(FileDownloadAction::Unpack(ArchiveType::Bz2)),
+ };
+
+ vec![
+ download_bsbm_tools,
+ generate_dataset,
+ download_pregenerated_queries,
+ ]
+ }
+
+ async fn execute(
+ &self,
+ bench_context: &BenchmarkContext<'_>,
+ ) -> anyhow::Result> {
+ let operations = self.list_operations(bench_context.parent())?;
+ let memory_store = self.prepare_store(bench_context).await?;
+ let report = execute_benchmark(bench_context, operations, &memory_store).await?;
+ Ok(Box::new(report))
+ }
+}
+
+fn parse_query(
+ query: BsbmBusinessIntelligenceRawOperation,
+) -> Option {
+ match query {
+ BsbmBusinessIntelligenceRawOperation::Query(name, query) => {
+ // TODO remove once describe is supported
+ if query.contains("DESCRIBE") {
+ None
+ } else {
+ Some(BsbmBusinessIntelligenceOperation::Query(
+ name,
+ Query::parse(&query.replace('#', ""), None).unwrap(),
+ ))
+ }
+ }
+ }
+}
+
+async fn execute_benchmark(
+ context: &BenchmarkContext<'_>,
+ operations: Vec,
+ memory_store: &Store,
+) -> anyhow::Result {
+ println!("Evaluating queries ...");
+
+ let mut report = BusinessIntelligenceReportBuilder::new();
+ let len = operations.len();
+ for (idx, operation) in operations.iter().enumerate() {
+ if idx % 25 == 0 {
+ println!("Progress: {idx}/{len}");
+ }
+
+ run_operation(context, &mut report, memory_store, operation).await?;
+ }
+ let report = report.build();
+
+ println!("Progress: {len}/{len}");
+ println!("All queries evaluated.");
+
+ Ok(report)
+}
+
+/// Executes a single [BsbmBusinessIntelligenceOperation], profiles the execution, and stores the
+/// results of the profiling in the `report`.
+async fn run_operation(
+ context: &BenchmarkContext<'_>,
+ report: &mut BusinessIntelligenceReportBuilder,
+ store: &Store,
+ operation: &BsbmBusinessIntelligenceOperation,
+) -> anyhow::Result<()> {
+ let guard = pprof::ProfilerGuardBuilder::default()
+ .frequency(1000)
+ .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+ .build()?;
+ let start = Instant::now();
+
+ let options = QueryOptions;
+ let (name, explanation) = match operation {
+ BsbmBusinessIntelligenceOperation::Query(name, q) => {
+ let (result, explanation) = store.explain_query_opt(q.clone(), options.clone()).await?;
+ match result {
+ QueryResults::Boolean(_) => (),
+ QueryResults::Solutions(mut s) => {
+ while let Some(s) = s.next().await {
+ s?;
+ }
+ }
+ QueryResults::Graph(mut g) => {
+ while let Some(t) = g.next().await {
+ t?;
+ }
+ }
+ }
+ (*name, explanation)
+ }
+ };
+
+ let run = BenchmarkRun {
+ duration: start.elapsed(),
+ report: Some(guard.report().build()?),
+ };
+ report.add_run(name, run);
+ if context.parent().options().verbose_results {
+ report.add_explanation(explanation);
+ }
+
+ Ok(())
+}
diff --git a/bench/src/benchmarks/bsbm/business_intelligence/mod.rs b/bench/src/benchmarks/bsbm/business_intelligence/mod.rs
new file mode 100644
index 00000000..8aca3b65
--- /dev/null
+++ b/bench/src/benchmarks/bsbm/business_intelligence/mod.rs
@@ -0,0 +1,69 @@
+mod benchmark;
+mod operation;
+mod report;
+
+use clap::ValueEnum;
+use std::fmt::{Display, Formatter};
+
+pub use benchmark::BsbmBusinessIntelligenceBenchmark;
+
+pub(super) const BSBM_BUSINESS_INTELLIGENCE_QUERIES: [BsbmBusinessIntelligenceQueryName; 8] = [
+ BsbmBusinessIntelligenceQueryName::Q1,
+ BsbmBusinessIntelligenceQueryName::Q2,
+ BsbmBusinessIntelligenceQueryName::Q3,
+ BsbmBusinessIntelligenceQueryName::Q4,
+ BsbmBusinessIntelligenceQueryName::Q5,
+ BsbmBusinessIntelligenceQueryName::Q6,
+ BsbmBusinessIntelligenceQueryName::Q7,
+ BsbmBusinessIntelligenceQueryName::Q8,
+];
+
+/// The BSBM business intelligence query names.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, ValueEnum)]
+pub(super) enum BsbmBusinessIntelligenceQueryName {
+ Q1,
+ Q2,
+ Q3,
+ Q4,
+ Q5,
+ Q6,
+ Q7,
+ Q8,
+}
+
+impl TryFrom for BsbmBusinessIntelligenceQueryName {
+ type Error = anyhow::Error;
+
+ fn try_from(value: u8) -> Result {
+ match value {
+ 1 => Ok(BsbmBusinessIntelligenceQueryName::Q1),
+ 2 => Ok(BsbmBusinessIntelligenceQueryName::Q2),
+ 3 => Ok(BsbmBusinessIntelligenceQueryName::Q3),
+ 4 => Ok(BsbmBusinessIntelligenceQueryName::Q4),
+ 5 => Ok(BsbmBusinessIntelligenceQueryName::Q5),
+ 6 => Ok(BsbmBusinessIntelligenceQueryName::Q6),
+ 7 => Ok(BsbmBusinessIntelligenceQueryName::Q7),
+ 8 => Ok(BsbmBusinessIntelligenceQueryName::Q8),
+ _ => Err(anyhow::anyhow!(
+ "Invalid BSBM Business Intelligence query name: {}",
+ value
+ )),
+ }
+ }
+}
+
+impl Display for BsbmBusinessIntelligenceQueryName {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ let string = match self {
+ BsbmBusinessIntelligenceQueryName::Q1 => "Q1",
+ BsbmBusinessIntelligenceQueryName::Q2 => "Q2",
+ BsbmBusinessIntelligenceQueryName::Q3 => "Q3",
+ BsbmBusinessIntelligenceQueryName::Q4 => "Q4",
+ BsbmBusinessIntelligenceQueryName::Q5 => "Q5",
+ BsbmBusinessIntelligenceQueryName::Q6 => "Q6",
+ BsbmBusinessIntelligenceQueryName::Q7 => "Q7",
+ BsbmBusinessIntelligenceQueryName::Q8 => "Q8",
+ };
+ write!(f, "{string}")
+ }
+}
diff --git a/bench/src/benchmarks/bsbm/business_intelligence/operation.rs b/bench/src/benchmarks/bsbm/business_intelligence/operation.rs
new file mode 100644
index 00000000..30629aa0
--- /dev/null
+++ b/bench/src/benchmarks/bsbm/business_intelligence/operation.rs
@@ -0,0 +1,42 @@
+use crate::benchmarks::bsbm::business_intelligence::BsbmBusinessIntelligenceQueryName;
+use rdf_fusion::Query;
+use std::fs;
+use std::path::Path;
+
+#[allow(clippy::panic)]
+#[allow(clippy::panic_in_result_fn)]
+#[allow(clippy::expect_used)]
+pub(super) fn list_raw_operations(
+ path: &Path,
+) -> anyhow::Result> {
+ let reader = fs::read(path)?;
+ let result = csv::Reader::from_reader(reader.as_slice())
+ .records()
+ .collect::, _>>()?
+ .into_iter()
+ .map(|record| {
+ let query_id = record[0].parse::().expect("Can't parse query id");
+ let query_name =
+ BsbmBusinessIntelligenceQueryName::try_from(query_id).expect("Invalid query id");
+
+ match &record[1] {
+ "query" => {
+ BsbmBusinessIntelligenceRawOperation::Query(query_name, record[2].into())
+ }
+ _ => panic!("Unexpected operation kind {}", &record[1]),
+ }
+ });
+ Ok(result)
+}
+
+#[allow(dead_code)]
+#[derive(Clone)]
+pub(super) enum BsbmBusinessIntelligenceRawOperation {
+ Query(BsbmBusinessIntelligenceQueryName, String),
+}
+
+#[allow(clippy::large_enum_variant, clippy::allow_attributes)]
+#[derive(Clone)]
+pub(super) enum BsbmBusinessIntelligenceOperation {
+ Query(BsbmBusinessIntelligenceQueryName, Query),
+}
diff --git a/bench/src/benchmarks/bsbm/business_intelligence/report.rs b/bench/src/benchmarks/bsbm/business_intelligence/report.rs
new file mode 100644
index 00000000..b16b965c
--- /dev/null
+++ b/bench/src/benchmarks/bsbm/business_intelligence/report.rs
@@ -0,0 +1,212 @@
+use crate::benchmarks::bsbm::business_intelligence::{
+ BsbmBusinessIntelligenceQueryName, BSBM_BUSINESS_INTELLIGENCE_QUERIES,
+};
+use crate::report::BenchmarkReport;
+use crate::runs::{BenchmarkRun, BenchmarkRuns};
+use crate::utils::write_flamegraph;
+use anyhow::{bail, Context};
+use datafusion::physical_plan::displayable;
+use prettytable::{row, Table};
+use rdf_fusion::QueryExplanation;
+use std::collections::HashMap;
+use std::fs;
+use std::io::Write;
+use std::path::Path;
+
+/// Stores the final report of executing a BSBM business intelligence benchmark.
+pub struct BusinessIntelligenceReport {
+ /// Stores all runs of the benchmark grouped by the query name.
+ /// A single query name can have multiple instances (with random variables) in BSBM.
+ runs: HashMap,
+ /// Query explanations for each run.
+ explanations: Vec,
+}
+
+impl BusinessIntelligenceReport {
+ /// Writes a tabular summary of the query execution time.
+ fn write_summary(&self, writer: &mut W) -> anyhow::Result<()> {
+ // Create the table
+ let mut table = Table::new();
+ table.add_row(row!["Query", "Samples", "Average Duration"]);
+ for query in BSBM_BUSINESS_INTELLIGENCE_QUERIES {
+ let summary = self
+ .runs
+ .get(&query)
+ .map(BenchmarkRuns::summarize)
+ .transpose()?;
+
+ let samples = summary
+ .as_ref()
+ .map_or_else(|| "-".to_owned(), |s| s.number_of_samples.to_string());
+ let average_duration = summary
+ .as_ref()
+ .map_or_else(|| "-".to_owned(), |s| format!("{:?}", s.avg_duration));
+
+ table.add_row(row![query.to_string(), samples, average_duration]);
+ }
+ table.print(writer)?;
+
+ Ok(())
+ }
+
+ /// Write aggregated flamegraph.
+ fn write_aggregated_flamegraphs(&self, output_directory: &Path) -> anyhow::Result<()> {
+ if !output_directory.is_dir() {
+ bail!(
+ "Output directory {} does not exist",
+ output_directory.display()
+ );
+ }
+
+ for query in BSBM_BUSINESS_INTELLIGENCE_QUERIES {
+ let frames = self
+ .runs
+ .get(&query)
+ .map(BenchmarkRuns::accumulate_profiles)
+ .transpose()?;
+ if let Some(frames) = frames {
+ let flamegraph_file = output_directory.join(format!("{query}.svg"));
+ let mut flamegraph_file =
+ fs::File::create(flamegraph_file).context("Cannot create flamegraph file")?;
+ write_flamegraph(&mut flamegraph_file, &frames)?;
+ }
+ }
+
+ Ok(())
+ }
+
+ fn write_query_results(&self, output_directory: &Path, index: usize) -> anyhow::Result<()> {
+ let query_i_path = output_directory.join(format!("query{index}"));
+ fs::create_dir_all(&query_i_path).context("Cannot create query directory")?;
+
+ let summary_file = query_i_path.join("0_summary.txt");
+ let initial_logical_plan_file = query_i_path.join("1_initial_logical_plan.txt");
+ let optimized_logical_plan_file = query_i_path.join("2_optimized_logical_plan.txt");
+ let execution_plan_file = query_i_path.join("3_execution_plan.txt");
+
+ let explanation = self
+ .explanations
+ .get(index)
+ .context("Cannot get explanation")?;
+
+ // Write the initial logical plan
+ fs::write(
+ &summary_file,
+ format!("Planning Time:{:?}", explanation.planning_time),
+ )
+ .with_context(|| {
+ format!(
+ "Failed to write summary plan to {}",
+ initial_logical_plan_file.display()
+ )
+ })?;
+
+ // Write the initial logical plan
+ let initial_logical_plan = explanation.initial_logical_plan.to_string();
+ fs::write(
+ &initial_logical_plan_file,
+ format!("Initial Logical Plan:\n\n{initial_logical_plan}"),
+ )
+ .with_context(|| {
+ format!(
+ "Failed to write initial logical plan to {}",
+ initial_logical_plan_file.display()
+ )
+ })?;
+
+ // Write the optimized logical plan
+ let optimized_logical_plan = explanation.optimized_logical_plan.to_string();
+ fs::write(
+ &optimized_logical_plan_file,
+ format!("Optimized Logical Plan:\n\n{optimized_logical_plan}"),
+ )
+ .with_context(|| {
+ format!(
+ "Failed to write optimized logical plan to {}",
+ optimized_logical_plan_file.display()
+ )
+ })?;
+
+ // Write the execution plan
+ let execution_plan = displayable(explanation.execution_plan.as_ref()).indent(false);
+ fs::write(
+ &execution_plan_file,
+ format!("Execution Plan:\n\n{execution_plan}"),
+ )
+ .with_context(|| {
+ format!(
+ "Failed to write execution plan to {}",
+ execution_plan_file.display()
+ )
+ })?;
+
+ Ok(())
+ }
+}
+
+impl BenchmarkReport for BusinessIntelligenceReport {
+ fn write_results(&self, output_dir: &Path) -> anyhow::Result<()> {
+ let summary_txt = output_dir.join("summary.txt");
+ let mut summary_file = fs::File::create(summary_txt)?;
+ self.write_summary(&mut summary_file)?;
+
+ let flamegraphs_dir = output_dir.join("flamegraphs");
+ fs::create_dir_all(&flamegraphs_dir)
+ .context("Cannot create flamegraphs directory before writing flamegraphs")?;
+ self.write_aggregated_flamegraphs(&flamegraphs_dir)?;
+
+ if !self.explanations.is_empty() {
+ let queries_path = output_dir.join("queries");
+ fs::create_dir_all(&queries_path).context("Cannot create queries directory")?;
+ for i in 0..self.explanations.len() {
+ self.write_query_results(&queries_path, i)?;
+ }
+ }
+
+ Ok(())
+ }
+}
+
+/// Builder for the [`BusinessIntelligenceReport`].
+///
+/// This should only be accessible to the benchmark code.
+pub(super) struct BusinessIntelligenceReportBuilder {
+ /// The inner report that is being built.
+ report: BusinessIntelligenceReport,
+}
+
+impl BusinessIntelligenceReportBuilder {
+ /// Creates a new builder.
+ pub(super) fn new() -> Self {
+ Self {
+ report: BusinessIntelligenceReport {
+ runs: HashMap::new(),
+ explanations: Vec::new(),
+ },
+ }
+ }
+
+ /// Adds a run to a particular query.
+ pub(super) fn add_run(&mut self, name: BsbmBusinessIntelligenceQueryName, run: BenchmarkRun) {
+ let runs = self.report.runs.entry(name).or_default();
+ runs.add_run(run);
+ }
+
+ /// Adds an explanation for a particular query.
+ ///
+ /// It is expected that the n-th call of this method is the explanation of the n-th query.
+ pub(super) fn add_explanation(&mut self, explanation: QueryExplanation) {
+ self.report.explanations.push(explanation)
+ }
+
+ /// Finalizes the report.
+ pub(super) fn build(self) -> BusinessIntelligenceReport {
+ self.report
+ }
+}
+
+impl Default for BusinessIntelligenceReportBuilder {
+ fn default() -> Self {
+ Self::new()
+ }
+}
diff --git a/bench/src/benchmarks/bsbm/explore/benchmark.rs b/bench/src/benchmarks/bsbm/explore/benchmark.rs
index 95c3366d..9e56085a 100644
--- a/bench/src/benchmarks/bsbm/explore/benchmark.rs
+++ b/bench/src/benchmarks/bsbm/explore/benchmark.rs
@@ -33,7 +33,7 @@ pub struct BsbmExploreBenchmark {
impl BsbmExploreBenchmark {
/// Creates a new [BsbmExploreBenchmark] with the given sizes.
pub fn new(dataset_size: BsbmDatasetSize, max_query_count: Option) -> Self {
- let name = BenchmarkName::Bsbm {
+ let name = BenchmarkName::BsbmExplore {
dataset_size,
max_query_count,
};
diff --git a/bench/src/benchmarks/bsbm/mod.rs b/bench/src/benchmarks/bsbm/mod.rs
index c6d522a8..6f5ebf93 100644
--- a/bench/src/benchmarks/bsbm/mod.rs
+++ b/bench/src/benchmarks/bsbm/mod.rs
@@ -1,8 +1,10 @@
use clap::ValueEnum;
use std::fmt::{Display, Formatter};
+mod business_intelligence;
mod explore;
+pub use business_intelligence::BsbmBusinessIntelligenceBenchmark;
pub use explore::BsbmExploreBenchmark;
/// Indicates the size of the dataset.
diff --git a/bench/src/benchmarks/name.rs b/bench/src/benchmarks/name.rs
index 92aaa3c6..f507da53 100644
--- a/bench/src/benchmarks/name.rs
+++ b/bench/src/benchmarks/name.rs
@@ -4,8 +4,17 @@ use std::fmt::{Display, Formatter};
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Subcommand)]
pub enum BenchmarkName {
- /// Runs a BSBM instance with already generated queries from Oxigraph.
- Bsbm {
+ /// Represents the BSBM explore benchmark.
+ BsbmExplore {
+ /// Indicates the scaling of the dataset.
+ #[arg(short, long, default_value = "1000")]
+ dataset_size: BsbmDatasetSize,
+ /// Provides an upper bound on the number of queries to be executed.
+ #[arg(short, long)]
+ max_query_count: Option,
+ },
+ /// Represents the BSBM business intelligence benchmark.
+ BsbmBusinessIntelligence {
/// Indicates the scaling of the dataset.
#[arg(short, long, default_value = "1000")]
dataset_size: BsbmDatasetSize,
@@ -19,12 +28,19 @@ impl BenchmarkName {
/// Returns a directory name for the benchmark.
pub fn dir_name(&self) -> String {
match self {
- BenchmarkName::Bsbm {
+ BenchmarkName::BsbmExplore {
+ dataset_size,
+ max_query_count,
+ } => match max_query_count {
+ Some(max_query_count) => format!("bsbm-explore-{dataset_size}-{max_query_count}"),
+ None => format!("bsbm-explore-{dataset_size}"),
+ },
+ BenchmarkName::BsbmBusinessIntelligence {
dataset_size,
max_query_count,
} => match max_query_count {
- Some(max_query_count) => format!("bsbm-{dataset_size}-{max_query_count}"),
- None => format!("bsbm-{dataset_size}"),
+ Some(max_query_count) => format!("bsbm-bi-{dataset_size}-{max_query_count}"),
+ None => format!("bsbm-bi-{dataset_size}"),
},
}
}
@@ -33,7 +49,7 @@ impl BenchmarkName {
impl Display for BenchmarkName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
- BenchmarkName::Bsbm {
+ BenchmarkName::BsbmExplore {
dataset_size,
max_query_count,
} => match max_query_count {
@@ -43,6 +59,16 @@ impl Display for BenchmarkName {
),
None => write!(f, "BSBM Explore: dataset_size={dataset_size}"),
},
+ BenchmarkName::BsbmBusinessIntelligence {
+ dataset_size,
+ max_query_count,
+ } => match max_query_count {
+ Some(max_query_count) => write!(
+ f,
+ "BSBM Business Intelligence: dataset_size={dataset_size}, max_query_count={max_query_count}"
+ ),
+ None => write!(f, "BSBM Business Intelligence: dataset_size={dataset_size}"),
+ },
}
}
}
diff --git a/bench/src/lib.rs b/bench/src/lib.rs
index 8b865058..3fe40ec2 100644
--- a/bench/src/lib.rs
+++ b/bench/src/lib.rs
@@ -1,6 +1,6 @@
#![allow(clippy::print_stdout)]
-use crate::benchmarks::bsbm::BsbmExploreBenchmark;
+use crate::benchmarks::bsbm::{BsbmBusinessIntelligenceBenchmark, BsbmExploreBenchmark};
use crate::benchmarks::{Benchmark, BenchmarkName};
use crate::environment::RdfFusionBenchContext;
use clap::ValueEnum;
@@ -80,9 +80,16 @@ pub async fn execute_benchmark_operation(
fn create_benchmark_instance(benchmark: BenchmarkName) -> Box {
match benchmark {
- BenchmarkName::Bsbm {
+ BenchmarkName::BsbmExplore {
dataset_size,
max_query_count: query_size,
} => Box::new(BsbmExploreBenchmark::new(dataset_size, query_size)),
+ BenchmarkName::BsbmBusinessIntelligence {
+ dataset_size,
+ max_query_count: query_size,
+ } => Box::new(BsbmBusinessIntelligenceBenchmark::new(
+ dataset_size,
+ query_size,
+ )),
}
}