Skip to content

Commit 52e0714

Browse files
authored
chore: purge inverted index (#15354)
* purge inverted index * purge inverted index info files * add code comments * add ee sql logic test * revert default query node config * chore: cleanup * resolve merge conflicts * evict inverted index cache
1 parent 6d07f34 commit 52e0714

File tree

10 files changed

+419
-20
lines changed

10 files changed

+419
-20
lines changed

src/query/storages/common/cache_manager/src/caches.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,20 @@ impl CachedObject<InvertedIndexFile, DefaultHashBuilder, InvertedIndexFileMeter>
155155
}
156156
}
157157

158+
impl CachedObject<IndexInfo> for IndexInfo {
159+
type Cache = InvertedIndexInfoCache;
160+
fn cache() -> Option<Self::Cache> {
161+
CacheManager::instance().get_inverted_index_info_cache()
162+
}
163+
}
164+
165+
impl CachedObject<InvertedIndexMeta> for InvertedIndexMeta {
166+
type Cache = InvertedIndexMetaCache;
167+
fn cache() -> Option<Self::Cache> {
168+
CacheManager::instance().get_inverted_index_meta_cache()
169+
}
170+
}
171+
158172
pub struct ColumnArrayMeter;
159173

160174
impl<K, V> Meter<K, Arc<(V, usize)>> for ColumnArrayMeter {

src/query/storages/fuse/src/io/locations.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,14 @@ impl TableMetaLocationGenerator {
169169
}
170170

171171
pub fn gen_inverted_index_info_location(&self) -> String {
172+
let prefix = self.inverted_index_info_prefix();
172173
let uuid = Uuid::new_v4().simple().to_string();
173-
format!(
174-
"{}/{}/{}_v{}.mpk",
175-
&self.prefix,
176-
FUSE_TBL_INVERTED_INDEX_INFO_PREFIX,
177-
uuid,
178-
IndexInfo::VERSION,
179-
)
174+
format!("{}{}_v{}.mpk", prefix, uuid, IndexInfo::VERSION,)
175+
}
176+
177+
// inverted index info path, trailing slash "/" included.
178+
pub fn inverted_index_info_prefix(&self) -> String {
179+
format!("{}/{}/", &self.prefix, FUSE_TBL_INVERTED_INDEX_INFO_PREFIX)
180180
}
181181

182182
pub fn gen_inverted_index_location_from_block_location(

src/query/storages/fuse/src/io/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub use read::AggIndexReader;
2525
pub use read::BlockReader;
2626
pub use read::BloomBlockFilterReader;
2727
pub use read::CompactSegmentInfoReader;
28+
pub use read::InvertedIndexReader;
2829
pub use read::MergeIOReadResult;
2930
pub use read::MetaReaders;
3031
pub use read::NativeReaderExt;

src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@ use crate::io::MetaReaders;
4141
type CachedReader =
4242
InMemoryCacheReader<InvertedIndexFile, InvertedIndexFileLoader, InvertedIndexFileMeter>;
4343

44+
const INDEX_COLUMN_NAMES: [&str; 8] = [
45+
"fast",
46+
"store",
47+
"fieldnorm",
48+
"pos",
49+
"idx",
50+
"term",
51+
"meta.json",
52+
".managed.json",
53+
];
54+
4455
/// Loads inverted index info data
4556
/// read data from cache, or populate cache items if possible
4657
#[minitrace::trace]
@@ -188,7 +199,7 @@ impl InvertedIndexFileReader {
188199
column_meta: &SingleColumnMeta,
189200
operator: Operator,
190201
) -> Self {
191-
let cache_key = format!("{index_path}-{name}");
202+
let cache_key = Self::cache_key_of_column(&index_path, &name);
192203

193204
let loader = InvertedIndexFileLoader {
194205
offset: column_meta.offset,
@@ -217,6 +228,17 @@ impl InvertedIndexFileReader {
217228
pub async fn read(&self) -> Result<Arc<InvertedIndexFile>> {
218229
self.cached_reader.read(&self.param).await
219230
}
231+
232+
fn cache_key_of_column(index_path: &str, index_column_name: &str) -> String {
233+
format!("{index_path}-{index_column_name}")
234+
}
235+
236+
pub(crate) fn cache_key_of_index_columns(index_path: &str) -> Vec<String> {
237+
INDEX_COLUMN_NAMES
238+
.iter()
239+
.map(|column_name| Self::cache_key_of_column(index_path, column_name))
240+
.collect()
241+
}
220242
}
221243

222244
/// Loader that fetch range of the target object with customized cache key

src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use tantivy::Index;
2525
use tantivy::Score;
2626

2727
use crate::io::read::inverted_index::inverted_index_loader::load_inverted_index_directory;
28+
use crate::io::read::inverted_index::inverted_index_loader::InvertedIndexFileReader;
2829

2930
#[derive(Clone)]
3031
pub struct InvertedIndexReader {
@@ -107,4 +108,9 @@ impl InvertedIndexReader {
107108

108109
Ok(Some(matched_rows))
109110
}
111+
112+
// delegation of [InvertedIndexFileReader::cache_key_of_index_columns]
113+
pub fn cache_key_of_index_columns(index_path: &str) -> Vec<String> {
114+
InvertedIndexFileReader::cache_key_of_index_columns(index_path)
115+
}
110116
}

src/query/storages/fuse/src/io/snapshots.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ impl SnapshotsIO {
375375
}
376376
},
377377
_ => {
378-
warn!("found not snapshot file in {:}, found: {:?}", prefix, de);
378+
warn!("non-file entry found in {:}, the entry: {:?}", prefix, de);
379379
continue;
380380
}
381381
}

src/query/storages/fuse/src/operations/common/processors/sink_commit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,8 @@ where F: SnapshotGenerator + Send + 'static
381381
let tbl = FuseTable::try_from_table(latest.as_ref())?;
382382

383383
warn!(
384-
"table detected, purging historical data. ({})",
385-
tbl.table_info.ident
384+
"purging historical data. table: {}, ident: {}",
385+
tbl.table_info.name, tbl.table_info.ident
386386
);
387387

388388
let keep_last_snapshot = true;

0 commit comments

Comments
 (0)