Skip to content

Commit 9436049

Browse files
authored
Merge pull request #1506 from quickwit-oss/multifastfieldbench
add benchmark for multivalue fast field
2 parents 2a6479b + 21c9a26 commit 9436049

File tree

1 file changed

+148
-0
lines changed
  • src/fastfield/multivalued

1 file changed

+148
-0
lines changed

src/fastfield/multivalued/mod.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,151 @@ mod tests {
386386
Ok(())
387387
}
388388
}
389+
390+
#[cfg(all(test, feature = "unstable"))]
391+
mod bench {
392+
use std::collections::HashMap;
393+
use std::path::Path;
394+
395+
use test::{self, Bencher};
396+
397+
use super::*;
398+
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
399+
use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter};
400+
use crate::indexer::doc_id_mapping::DocIdMapping;
401+
use crate::schema::{Cardinality, NumericOptions, Schema};
402+
use crate::Document;
403+
404+
fn multi_values(num_docs: usize, vals_per_doc: usize) -> Vec<Vec<u64>> {
405+
let mut vals = vec![];
406+
for _i in 0..num_docs {
407+
let mut block = vec![];
408+
for j in 0..vals_per_doc {
409+
block.push(j as u64);
410+
}
411+
vals.push(block);
412+
}
413+
414+
vals
415+
}
416+
417+
#[bench]
418+
fn bench_multi_value_fflookup(b: &mut Bencher) {
419+
let num_docs = 100_000;
420+
421+
let path = Path::new("test");
422+
let directory: RamDirectory = RamDirectory::create();
423+
let field = {
424+
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
425+
let mut schema_builder = Schema::builder();
426+
let field = schema_builder.add_u64_field("field", options);
427+
let schema = schema_builder.build();
428+
429+
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
430+
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
431+
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
432+
for block in &multi_values(num_docs, 3) {
433+
let mut doc = Document::new();
434+
for val in block {
435+
doc.add_u64(field, *val);
436+
}
437+
fast_field_writers.add_document(&doc);
438+
}
439+
fast_field_writers
440+
.serialize(&mut serializer, &HashMap::new(), None)
441+
.unwrap();
442+
serializer.close().unwrap();
443+
field
444+
};
445+
let file = directory.open_read(&path).unwrap();
446+
{
447+
let fast_fields_composite = CompositeFile::open(&file).unwrap();
448+
let data_idx = fast_fields_composite
449+
.open_read_with_idx(field, 0)
450+
.unwrap()
451+
.read_bytes()
452+
.unwrap();
453+
let idx_reader = fastfield_codecs::open(data_idx).unwrap();
454+
455+
let data_vals = fast_fields_composite
456+
.open_read_with_idx(field, 1)
457+
.unwrap()
458+
.read_bytes()
459+
.unwrap();
460+
let vals_reader = fastfield_codecs::open(data_vals).unwrap();
461+
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
462+
b.iter(|| {
463+
let mut sum = 0u64;
464+
let mut data = Vec::with_capacity(10);
465+
for i in 0u32..num_docs as u32 {
466+
fast_field_reader.get_vals(i, &mut data);
467+
sum += data.iter().sum::<u64>();
468+
}
469+
sum
470+
});
471+
}
472+
}
473+
474+
#[bench]
475+
fn bench_multi_value_ff_creation(b: &mut Bencher) {
476+
// 3 million ff entries
477+
let num_docs = 1_000_000;
478+
let multi_values = multi_values(num_docs, 3);
479+
480+
b.iter(|| {
481+
let directory: RamDirectory = RamDirectory::create();
482+
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
483+
let mut schema_builder = Schema::builder();
484+
let field = schema_builder.add_u64_field("field", options);
485+
let schema = schema_builder.build();
486+
487+
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
488+
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
489+
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
490+
for block in &multi_values {
491+
let mut doc = Document::new();
492+
for val in block {
493+
doc.add_u64(field, *val);
494+
}
495+
fast_field_writers.add_document(&doc);
496+
}
497+
fast_field_writers
498+
.serialize(&mut serializer, &HashMap::new(), None)
499+
.unwrap();
500+
serializer.close().unwrap();
501+
});
502+
}
503+
504+
#[bench]
505+
fn bench_multi_value_ff_creation_with_sorting(b: &mut Bencher) {
506+
// 3 million ff entries
507+
let num_docs = 1_000_000;
508+
let multi_values = multi_values(num_docs, 3);
509+
510+
let doc_id_mapping =
511+
DocIdMapping::from_new_id_to_old_id((0..1_000_000).collect::<Vec<_>>());
512+
513+
b.iter(|| {
514+
let directory: RamDirectory = RamDirectory::create();
515+
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
516+
let mut schema_builder = Schema::builder();
517+
let field = schema_builder.add_u64_field("field", options);
518+
let schema = schema_builder.build();
519+
520+
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
521+
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
522+
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
523+
for block in &multi_values {
524+
let mut doc = Document::new();
525+
for val in block {
526+
doc.add_u64(field, *val);
527+
}
528+
fast_field_writers.add_document(&doc);
529+
}
530+
fast_field_writers
531+
.serialize(&mut serializer, &HashMap::new(), Some(&doc_id_mapping))
532+
.unwrap();
533+
serializer.close().unwrap();
534+
});
535+
}
536+
}

0 commit comments

Comments
 (0)