Skip to content

Commit 62d1115

Browse files
authored
Merge pull request #319 from meilisearch/dup-sort-custom-comparator
Add Dup Sort Custom Comparator support
2 parents 2988ff8 + 4306d61 commit 62d1115

File tree

9 files changed

+329
-71
lines changed

9 files changed

+329
-71
lines changed

examples/custom-dupsort-comparator.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use std::cmp::Ordering;
2+
use std::error::Error;
3+
use std::fs;
4+
use std::path::Path;
5+
6+
use byteorder::BigEndian;
7+
use heed::{DatabaseFlags, EnvOpenOptions};
8+
use heed_traits::Comparator;
9+
use heed_types::{Str, U128};
10+
11+
enum DescendingIntCmp {}
12+
13+
impl Comparator for DescendingIntCmp {
14+
fn compare(a: &[u8], b: &[u8]) -> Ordering {
15+
a.cmp(b).reverse()
16+
}
17+
}
18+
19+
fn main() -> Result<(), Box<dyn Error>> {
20+
let env_path = Path::new("target").join("custom-dupsort-cmp.mdb");
21+
22+
let _ = fs::remove_dir_all(&env_path);
23+
24+
fs::create_dir_all(&env_path)?;
25+
let env = unsafe {
26+
EnvOpenOptions::new()
27+
.map_size(10 * 1024 * 1024) // 10MB
28+
.max_dbs(3)
29+
.open(env_path)?
30+
};
31+
32+
let mut wtxn = env.write_txn()?;
33+
let db = env
34+
.database_options()
35+
.types::<Str, U128<BigEndian>>()
36+
.flags(DatabaseFlags::DUP_SORT)
37+
.dup_sort_comparator::<DescendingIntCmp>()
38+
.create(&mut wtxn)?;
39+
wtxn.commit()?;
40+
41+
let mut wtxn = env.write_txn()?;
42+
43+
// We fill our database with entries.
44+
db.put(&mut wtxn, "1", &1)?;
45+
db.put(&mut wtxn, "1", &2)?;
46+
db.put(&mut wtxn, "1", &3)?;
47+
db.put(&mut wtxn, "2", &4)?;
48+
db.put(&mut wtxn, "1", &5)?;
49+
db.put(&mut wtxn, "0", &0)?;
50+
51+
// We check that the keys are in lexicographic and values in descending order.
52+
let mut iter = db.iter(&wtxn)?;
53+
assert_eq!(iter.next().transpose()?, Some(("0", 0)));
54+
assert_eq!(iter.next().transpose()?, Some(("1", 5)));
55+
assert_eq!(iter.next().transpose()?, Some(("1", 3)));
56+
assert_eq!(iter.next().transpose()?, Some(("1", 2)));
57+
assert_eq!(iter.next().transpose()?, Some(("1", 1)));
58+
assert_eq!(iter.next().transpose()?, Some(("2", 4)));
59+
drop(iter);
60+
61+
Ok(())
62+
}

heed/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ path = "../examples/cursor-append.rs"
131131
name = "custom-comparator"
132132
path = "../examples/custom-comparator.rs"
133133

134+
[[example]]
135+
name = "custom-dupsort-comparator"
136+
path = "../examples/custom-dupsort-comparator.rs"
137+
134138
[[example]]
135139
name = "multi-env"
136140
path = "../examples/multi-env.rs"

heed/src/cookbook.rs

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
//! - [Create Custom and Prefix Codecs](#create-custom-and-prefix-codecs)
66
//! - [Change the Environment Size Dynamically](#change-the-environment-size-dynamically)
77
//! - [Advanced Multithreaded Access of Entries](#advanced-multithreaded-access-of-entries)
8+
//! - [Use Custom Key Comparator](#use-custom-key-comparator)
9+
//! - [Use Custom Dupsort Comparator](#use-custom-dupsort-comparator)
810
//!
911
//! # Decode Values on Demand
1012
//!
@@ -441,9 +443,146 @@
441443
//! unsafe impl Sync for ImmutableMap<'_> {}
442444
//! ```
443445
//!
446+
//! # Use Custom Key Comparator
447+
//!
448+
//! LMDB keys are sorted in lexicographic order by default. To change this behavior
449+
//! you can implement a custom [`Comparator`] and provide it when creating the database.
450+
//!
451+
//! Under the hood this translates into a [`mdb_set_compare`] call.
452+
//!
453+
//! ```
454+
//! use std::cmp::Ordering;
455+
//! use std::error::Error;
456+
//! use std::str;
457+
//!
458+
//! use heed::EnvOpenOptions;
459+
//! use heed_traits::Comparator;
460+
//! use heed_types::{Str, Unit};
461+
//!
462+
//! enum StringAsIntCmp {}
463+
//!
464+
//! // This function takes two strings which represent integers,
465+
//! // parses them into i32s and compare the parsed value.
466+
//! // Therefore "-1000" < "-100" must be true even without '0' padding.
467+
//! impl Comparator for StringAsIntCmp {
468+
//! fn compare(a: &[u8], b: &[u8]) -> Ordering {
469+
//! let a: i32 = str::from_utf8(a).unwrap().parse().unwrap();
470+
//! let b: i32 = str::from_utf8(b).unwrap().parse().unwrap();
471+
//! a.cmp(&b)
472+
//! }
473+
//! }
474+
//!
475+
//! fn main() -> Result<(), Box<dyn Error>> {
476+
//! let path = tempfile::tempdir()?;
477+
//!
478+
//! let env = unsafe {
479+
//! EnvOpenOptions::new()
480+
//! .map_size(10 * 1024 * 1024) // 10MB
481+
//! .max_dbs(3)
482+
//! .open(path)?
483+
//! };
484+
//!
485+
//! let mut wtxn = env.write_txn()?;
486+
//! let db = env
487+
//! .database_options()
488+
//! .types::<Str, Unit>()
489+
//! .key_comparator::<StringAsIntCmp>()
490+
//! .create(&mut wtxn)?;
491+
//! wtxn.commit()?;
492+
//!
493+
//! let mut wtxn = env.write_txn()?;
494+
//!
495+
//! // We fill our database with entries.
496+
//! db.put(&mut wtxn, "-100000", &())?;
497+
//! db.put(&mut wtxn, "-10000", &())?;
498+
//! db.put(&mut wtxn, "-1000", &())?;
499+
//! db.put(&mut wtxn, "-100", &())?;
500+
//! db.put(&mut wtxn, "100", &())?;
501+
//!
502+
//! // We check that the key are in the right order ("-100" < "-1000" < "-10000"...)
503+
//! let mut iter = db.iter(&wtxn)?;
504+
//! assert_eq!(iter.next().transpose()?, Some(("-100000", ())));
505+
//! assert_eq!(iter.next().transpose()?, Some(("-10000", ())));
506+
//! assert_eq!(iter.next().transpose()?, Some(("-1000", ())));
507+
//! assert_eq!(iter.next().transpose()?, Some(("-100", ())));
508+
//! assert_eq!(iter.next().transpose()?, Some(("100", ())));
509+
//! drop(iter);
510+
//!
511+
//! Ok(())
512+
//! }
513+
//! ```
514+
//!
515+
//! # Use Custom Dupsort Comparator
516+
//!
517+
//! When using DUPSORT LMDB sorts values of the same key in lexicographic order by default.
518+
//! To change this behavior you can implement a custom [`Comparator`] and provide it when
519+
//! creating the database.
520+
//!
521+
//! Under the hood this translates into a [`mdb_set_dupsort`] call.
522+
//!
523+
//! ```
524+
//! use std::cmp::Ordering;
525+
//! use std::error::Error;
526+
//!
527+
//! use byteorder::BigEndian;
528+
//! use heed::{DatabaseFlags, EnvOpenOptions};
529+
//! use heed_traits::Comparator;
530+
//! use heed_types::{Str, U128};
531+
//!
532+
//! enum DescendingIntCmp {}
533+
//!
534+
//! impl Comparator for DescendingIntCmp {
535+
//! fn compare(a: &[u8], b: &[u8]) -> Ordering {
536+
//! a.cmp(&b).reverse()
537+
//! }
538+
//! }
539+
//!
540+
//! fn main() -> Result<(), Box<dyn Error>> {
541+
//! let path = tempfile::tempdir()?;
542+
//!
543+
//! let env = unsafe {
544+
//! EnvOpenOptions::new()
545+
//! .map_size(10 * 1024 * 1024) // 10MB
546+
//! .max_dbs(3)
547+
//! .open(path)?
548+
//! };
549+
//!
550+
//! let mut wtxn = env.write_txn()?;
551+
//! let db = env
552+
//! .database_options()
553+
//! .types::<Str, U128<BigEndian>>()
554+
//! .flags(DatabaseFlags::DUP_SORT)
555+
//! .dup_sort_comparator::<DescendingIntCmp>()
556+
//! .create(&mut wtxn)?;
557+
//! wtxn.commit()?;
558+
//!
559+
//! let mut wtxn = env.write_txn()?;
560+
//!
561+
//! // We fill our database with entries.
562+
//! db.put(&mut wtxn, "1", &1)?;
563+
//! db.put(&mut wtxn, "1", &2)?;
564+
//! db.put(&mut wtxn, "1", &3)?;
565+
//! db.put(&mut wtxn, "2", &4)?;
566+
//! db.put(&mut wtxn, "1", &5)?;
567+
//! db.put(&mut wtxn, "0", &0)?;
568+
//!
569+
//! // We check that the keys are in lexicographic and values in descending order.
570+
//! let mut iter = db.iter(&wtxn)?;
571+
//! assert_eq!(iter.next().transpose()?, Some(("0", 0)));
572+
//! assert_eq!(iter.next().transpose()?, Some(("1", 5)));
573+
//! assert_eq!(iter.next().transpose()?, Some(("1", 3)));
574+
//! assert_eq!(iter.next().transpose()?, Some(("1", 2)));
575+
//! assert_eq!(iter.next().transpose()?, Some(("1", 1)));
576+
//! assert_eq!(iter.next().transpose()?, Some(("2", 4)));
577+
//! drop(iter);
578+
//!
579+
//! Ok(())
580+
//! }
581+
//! ```
582+
//!
444583
445584
// To let cargo generate doc links
446585
#![allow(unused_imports)]
447586

448-
use crate::envs::EnvOpenOptions;
449-
use crate::{BytesDecode, BytesEncode, Database};
587+
use crate::mdb::ffi::{mdb_set_compare, mdb_set_dupsort};
588+
use crate::{BytesDecode, BytesEncode, Comparator, Database, EnvOpenOptions};

heed/src/databases/database.rs

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ use crate::*;
5555
/// # Ok(()) }
5656
/// ```
5757
#[derive(Debug)]
58-
pub struct DatabaseOpenOptions<'e, 'n, T, KC, DC, C = DefaultComparator> {
58+
pub struct DatabaseOpenOptions<'e, 'n, T, KC, DC, C = DefaultComparator, CDUP = DefaultComparator> {
5959
env: &'e Env<T>,
60-
types: marker::PhantomData<(KC, DC, C)>,
60+
types: marker::PhantomData<(KC, DC, C, CDUP)>,
6161
name: Option<&'n str>,
6262
flags: AllDatabaseFlags,
6363
}
@@ -74,7 +74,7 @@ impl<'e, T> DatabaseOpenOptions<'e, 'static, T, Unspecified, Unspecified> {
7474
}
7575
}
7676

77-
impl<'e, 'n, T, KC, DC, C> DatabaseOpenOptions<'e, 'n, T, KC, DC, C> {
77+
impl<'e, 'n, T, KC, DC, C, CDUP> DatabaseOpenOptions<'e, 'n, T, KC, DC, C, CDUP> {
7878
/// Change the type of the database.
7979
///
8080
/// The default types are [`Unspecified`] and require a call to [`Database::remap_types`]
@@ -91,7 +91,19 @@ impl<'e, 'n, T, KC, DC, C> DatabaseOpenOptions<'e, 'n, T, KC, DC, C> {
9191
/// Change the customized key compare function of the database.
9292
///
9393
/// By default no customized compare function will be set when opening a database.
94-
pub fn key_comparator<NC>(self) -> DatabaseOpenOptions<'e, 'n, T, KC, DC, NC> {
94+
pub fn key_comparator<NC>(self) -> DatabaseOpenOptions<'e, 'n, T, KC, DC, NC, CDUP> {
95+
DatabaseOpenOptions {
96+
env: self.env,
97+
types: Default::default(),
98+
name: self.name,
99+
flags: self.flags,
100+
}
101+
}
102+
103+
/// Change the customized dup sort compare function of the database.
104+
///
105+
/// By default no customized compare function will be set when opening a database.
106+
pub fn dup_sort_comparator<NCDUP>(self) -> DatabaseOpenOptions<'e, 'n, T, KC, DC, C, NCDUP> {
95107
DatabaseOpenOptions {
96108
env: self.env,
97109
types: Default::default(),
@@ -132,15 +144,16 @@ impl<'e, 'n, T, KC, DC, C> DatabaseOpenOptions<'e, 'n, T, KC, DC, C> {
132144
///
133145
/// If not done, you might raise `Io(Os { code: 22, kind: InvalidInput, message: "Invalid argument" })`
134146
/// known as `EINVAL`.
135-
pub fn open(&self, rtxn: &RoTxn) -> Result<Option<Database<KC, DC, C>>>
147+
pub fn open(&self, rtxn: &RoTxn) -> Result<Option<Database<KC, DC, C, CDUP>>>
136148
where
137149
KC: 'static,
138150
DC: 'static,
139151
C: Comparator + 'static,
152+
CDUP: Comparator + 'static,
140153
{
141154
assert_eq_env_txn!(self.env, rtxn);
142155

143-
match self.env.raw_init_database::<C>(rtxn.txn_ptr(), self.name, self.flags) {
156+
match self.env.raw_init_database::<C, CDUP>(rtxn.txn_ptr(), self.name, self.flags) {
144157
Ok(dbi) => Ok(Some(Database::new(self.env.env_mut_ptr().as_ptr() as _, dbi))),
145158
Err(Error::Mdb(e)) if e.not_found() => Ok(None),
146159
Err(e) => Err(e),
@@ -156,29 +169,30 @@ impl<'e, 'n, T, KC, DC, C> DatabaseOpenOptions<'e, 'n, T, KC, DC, C> {
156169
/// LMDB has an important restriction on the unnamed database when named ones are opened.
157170
/// The names of the named databases are stored as keys in the unnamed one and are immutable,
158171
/// and these keys can only be read and not written.
159-
pub fn create(&self, wtxn: &mut RwTxn) -> Result<Database<KC, DC, C>>
172+
pub fn create(&self, wtxn: &mut RwTxn) -> Result<Database<KC, DC, C, CDUP>>
160173
where
161174
KC: 'static,
162175
DC: 'static,
163176
C: Comparator + 'static,
177+
CDUP: Comparator + 'static,
164178
{
165179
assert_eq_env_txn!(self.env, wtxn);
166180

167181
let flags = self.flags | AllDatabaseFlags::CREATE;
168-
match self.env.raw_init_database::<C>(wtxn.txn_ptr(), self.name, flags) {
182+
match self.env.raw_init_database::<C, CDUP>(wtxn.txn_ptr(), self.name, flags) {
169183
Ok(dbi) => Ok(Database::new(self.env.env_mut_ptr().as_ptr() as _, dbi)),
170184
Err(e) => Err(e),
171185
}
172186
}
173187
}
174188

175-
impl<T, KC, DC, C> Clone for DatabaseOpenOptions<'_, '_, T, KC, DC, C> {
189+
impl<T, KC, DC, C, CDUP> Clone for DatabaseOpenOptions<'_, '_, T, KC, DC, C, CDUP> {
176190
fn clone(&self) -> Self {
177191
*self
178192
}
179193
}
180194

181-
impl<T, KC, DC, C> Copy for DatabaseOpenOptions<'_, '_, T, KC, DC, C> {}
195+
impl<T, KC, DC, C, CDUP> Copy for DatabaseOpenOptions<'_, '_, T, KC, DC, C, CDUP> {}
182196

183197
/// A typed database that accepts only the types it was created with.
184198
///
@@ -292,14 +306,14 @@ impl<T, KC, DC, C> Copy for DatabaseOpenOptions<'_, '_, T, KC, DC, C> {}
292306
/// wtxn.commit()?;
293307
/// # Ok(()) }
294308
/// ```
295-
pub struct Database<KC, DC, C = DefaultComparator> {
309+
pub struct Database<KC, DC, C = DefaultComparator, CDUP = DefaultComparator> {
296310
pub(crate) env_ident: usize,
297311
pub(crate) dbi: ffi::MDB_dbi,
298-
marker: marker::PhantomData<(KC, DC, C)>,
312+
marker: marker::PhantomData<(KC, DC, C, CDUP)>,
299313
}
300314

301-
impl<KC, DC, C> Database<KC, DC, C> {
302-
pub(crate) fn new(env_ident: usize, dbi: ffi::MDB_dbi) -> Database<KC, DC, C> {
315+
impl<KC, DC, C, CDUP> Database<KC, DC, C, CDUP> {
316+
pub(crate) fn new(env_ident: usize, dbi: ffi::MDB_dbi) -> Database<KC, DC, C, CDUP> {
303317
Database { env_ident, dbi, marker: std::marker::PhantomData }
304318
}
305319

@@ -2693,20 +2707,21 @@ impl<KC, DC, C> Database<KC, DC, C> {
26932707
}
26942708
}
26952709

2696-
impl<KC, DC, C> Clone for Database<KC, DC, C> {
2697-
fn clone(&self) -> Database<KC, DC, C> {
2710+
impl<KC, DC, C, CDUP> Clone for Database<KC, DC, C, CDUP> {
2711+
fn clone(&self) -> Database<KC, DC, C, CDUP> {
26982712
*self
26992713
}
27002714
}
27012715

2702-
impl<KC, DC, C> Copy for Database<KC, DC, C> {}
2716+
impl<KC, DC, C, CDUP> Copy for Database<KC, DC, C, CDUP> {}
27032717

2704-
impl<KC, DC, C> fmt::Debug for Database<KC, DC, C> {
2718+
impl<KC, DC, C, CDUP> fmt::Debug for Database<KC, DC, C, CDUP> {
27052719
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
27062720
f.debug_struct("Database")
27072721
.field("key_codec", &any::type_name::<KC>())
27082722
.field("data_codec", &any::type_name::<DC>())
2709-
.field("comparator", &any::type_name::<C>())
2723+
.field("key_comparator", &any::type_name::<C>())
2724+
.field("dup_sort_comparator", &any::type_name::<CDUP>())
27102725
.finish()
27112726
}
27122727
}

0 commit comments

Comments
 (0)