Skip to content

Commit 6519ff5

Browse files
committed
[ENH] Add impl to stop indexing when metadata index set to false
1 parent 20c1129 commit 6519ff5

File tree

4 files changed

+100
-0
lines changed

4 files changed

+100
-0
lines changed

rust/segment/src/blockfile_metadata.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use chroma_types::F32_METADATA;
2121
use chroma_types::FULL_TEXT_PLS;
2222
use chroma_types::STRING_METADATA;
2323
use chroma_types::U32_METADATA;
24+
use chroma_types::{CollectionSchema, ValueType};
2425
use chroma_types::{MaterializedLogOperation, MetadataValue, Segment, SegmentUuid};
2526
use core::panic;
2627
use roaring::RoaringBitmap;
@@ -37,6 +38,7 @@ pub struct MetadataSegmentWriter<'me> {
3738
pub(crate) f32_metadata_index_writer: Option<MetadataIndexWriter<'me>>,
3839
pub(crate) u32_metadata_index_writer: Option<MetadataIndexWriter<'me>>,
3940
pub id: SegmentUuid,
41+
pub schema: Option<HashMap<String, HashMap<ValueType, CollectionSchema>>>,
4042
}
4143

4244
impl Debug for MetadataSegmentWriter<'_> {
@@ -102,6 +104,7 @@ impl<'me> MetadataSegmentWriter<'me> {
102104
database_id: &DatabaseUuid,
103105
segment: &Segment,
104106
blockfile_provider: &BlockfileProvider,
107+
schema: Option<HashMap<String, HashMap<ValueType, CollectionSchema>>>,
105108
) -> Result<MetadataSegmentWriter<'me>, MetadataSegmentError> {
106109
if segment.r#type != SegmentType::BlockfileMetadata {
107110
return Err(MetadataSegmentError::InvalidSegmentType);
@@ -316,6 +319,7 @@ impl<'me> MetadataSegmentWriter<'me> {
316319
f32_metadata_index_writer: Some(f32_metadata_index_writer),
317320
u32_metadata_index_writer: Some(u32_metadata_index_writer),
318321
id: segment.id,
322+
schema,
319323
})
320324
}
321325

@@ -325,6 +329,37 @@ impl<'me> MetadataSegmentWriter<'me> {
325329
key: &MetadataValue,
326330
offset_id: u32,
327331
) -> Result<(), MetadataIndexError> {
332+
let mut should_index = true;
333+
if let Some(schema) = &self.schema {
334+
let schema_key = prefix;
335+
if let Some(value_type_map) = schema.get(schema_key) {
336+
match key {
337+
MetadataValue::Str(_) => {
338+
if let Some(value_type_map) = value_type_map.get(&ValueType::String) {
339+
should_index = value_type_map.metadata_index;
340+
}
341+
}
342+
MetadataValue::Int(_) => {
343+
if let Some(value_type_map) = value_type_map.get(&ValueType::Int) {
344+
should_index = value_type_map.metadata_index;
345+
}
346+
}
347+
MetadataValue::Float(_) => {
348+
if let Some(value_type_map) = value_type_map.get(&ValueType::Float) {
349+
should_index = value_type_map.metadata_index;
350+
}
351+
}
352+
MetadataValue::Bool(_) => {
353+
if let Some(value_type_map) = value_type_map.get(&ValueType::Boolean) {
354+
should_index = value_type_map.metadata_index;
355+
}
356+
}
357+
}
358+
}
359+
}
360+
if !should_index {
361+
return Ok(());
362+
}
328363
match key {
329364
MetadataValue::Str(v) => {
330365
match &self.string_metadata_index_writer {
@@ -391,6 +426,37 @@ impl<'me> MetadataSegmentWriter<'me> {
391426
key: &MetadataValue,
392427
offset_id: u32,
393428
) -> Result<(), MetadataIndexError> {
429+
let mut should_index = true;
430+
if let Some(schema) = &self.schema {
431+
let schema_key = prefix;
432+
if let Some(value_type_map) = schema.get(schema_key) {
433+
match key {
434+
MetadataValue::Str(_) => {
435+
if let Some(value_type_map) = value_type_map.get(&ValueType::String) {
436+
should_index = value_type_map.metadata_index;
437+
}
438+
}
439+
MetadataValue::Int(_) => {
440+
if let Some(value_type_map) = value_type_map.get(&ValueType::Int) {
441+
should_index = value_type_map.metadata_index;
442+
}
443+
}
444+
MetadataValue::Float(_) => {
445+
if let Some(value_type_map) = value_type_map.get(&ValueType::Float) {
446+
should_index = value_type_map.metadata_index;
447+
}
448+
}
449+
MetadataValue::Bool(_) => {
450+
if let Some(value_type_map) = value_type_map.get(&ValueType::Boolean) {
451+
should_index = value_type_map.metadata_index;
452+
}
453+
}
454+
}
455+
}
456+
}
457+
if !should_index {
458+
return Ok(());
459+
}
394460
match key {
395461
MetadataValue::Str(v) => {
396462
match &self.string_metadata_index_writer {
@@ -1063,6 +1129,7 @@ mod test {
10631129
&database_id,
10641130
&metadata_segment,
10651131
&blockfile_provider,
1132+
None,
10661133
)
10671134
.await
10681135
.expect("Error creating segment writer");
@@ -1202,6 +1269,7 @@ mod test {
12021269
&database_id,
12031270
&metadata_segment,
12041271
&blockfile_provider,
1272+
None,
12051273
)
12061274
.await
12071275
.expect("Error creating segment writer");
@@ -1291,6 +1359,7 @@ mod test {
12911359
&database_id,
12921360
&metadata_segment,
12931361
&blockfile_provider,
1362+
None,
12941363
)
12951364
.await
12961365
.expect("Error creating segment writer");
@@ -1388,6 +1457,7 @@ mod test {
13881457
&database_id,
13891458
&metadata_segment,
13901459
&blockfile_provider,
1460+
None,
13911461
)
13921462
.await
13931463
.expect("Error creating segment writer");
@@ -1530,6 +1600,7 @@ mod test {
15301600
&database_id,
15311601
&metadata_segment,
15321602
&blockfile_provider,
1603+
None,
15331604
)
15341605
.await
15351606
.expect("Error creating segment writer");
@@ -1658,6 +1729,7 @@ mod test {
16581729
&database_id,
16591730
&metadata_segment,
16601731
&blockfile_provider,
1732+
None,
16611733
)
16621734
.await
16631735
.expect("Error creating segment writer");
@@ -1773,6 +1845,7 @@ mod test {
17731845
&database_id,
17741846
&metadata_segment,
17751847
&blockfile_provider,
1848+
None,
17761849
)
17771850
.await
17781851
.expect("Error creating segment writer");
@@ -1897,6 +1970,7 @@ mod test {
18971970
&database_id,
18981971
&metadata_segment,
18991972
&blockfile_provider,
1973+
None,
19001974
)
19011975
.await
19021976
.expect("Error creating segment writer");
@@ -2001,6 +2075,7 @@ mod test {
20012075
&database_id,
20022076
&metadata_segment,
20032077
&blockfile_provider,
2078+
None,
20042079
)
20052080
.await
20062081
.expect("Error creating segment writer");
@@ -2124,6 +2199,7 @@ mod test {
21242199
&database_id,
21252200
&metadata_segment,
21262201
&blockfile_provider,
2202+
None,
21272203
)
21282204
.await
21292205
.expect("Error creating segment writer");

rust/segment/src/test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ impl TestDistributedSegment {
8080
&self.collection.database_id,
8181
&self.metadata_segment,
8282
&self.blockfile_provider,
83+
self.collection.config.schema.clone(),
8384
)
8485
.await
8586
.expect("Should be able to initialize metadata writer.");

rust/types/src/collection_configuration.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,28 @@ pub struct UpdateCollectionConfiguration {
484484
pub schema: Option<HashMap<String, HashMap<ValueType, CollectionSchema>>>,
485485
}
486486

487+
pub fn diff_metadata_index_enable(
488+
old_schema: &Option<HashMap<String, HashMap<ValueType, CollectionSchema>>>,
489+
update_schema: &HashMap<String, HashMap<ValueType, CollectionSchema>>,
490+
) -> Vec<(String, ValueType)> {
491+
let mut backfill_needed = Vec::new();
492+
for (update_key, update_value) in update_schema {
493+
for (update_value_type, update_collection_schema) in update_value {
494+
let old_metadata_index = old_schema
495+
.as_ref()
496+
.and_then(|s| s.get(update_key))
497+
.and_then(|vt_map| vt_map.get(update_value_type))
498+
.map(|cs| cs.metadata_index)
499+
.unwrap_or(true); // default to true if not present
500+
let new_metadata_index = update_collection_schema.metadata_index;
501+
if !old_metadata_index && new_metadata_index {
502+
backfill_needed.push((update_key.clone(), update_value_type.clone()));
503+
}
504+
}
505+
}
506+
backfill_needed
507+
}
508+
487509
#[cfg(test)]
488510
mod tests {
489511
use crate::hnsw_configuration::HnswConfiguration;

rust/worker/src/execution/orchestration/compact.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ impl Handler<TaskResult<GetCollectionAndSegmentsOutput, GetCollectionAndSegments
682682
&collection.database_id,
683683
&metadata_segment,
684684
&self.blockfile_provider,
685+
collection.config.schema.clone(),
685686
)
686687
.await,
687688
ctx,

0 commit comments

Comments
 (0)