Skip to content

[enhance](inverted index)multi index on one column #50409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
302 changes: 160 additions & 142 deletions be/src/olap/compaction.cpp

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ class ComparisonPredicateBase : public ColumnPredicate {
if (iterator == nullptr) {
return Status::OK();
}
std::string column_name = name_with_type.first;

InvertedIndexQueryType query_type = InvertedIndexQueryType::UNKNOWN_QUERY;
switch (PT) {
Expand Down Expand Up @@ -104,7 +103,7 @@ class ComparisonPredicateBase : public ColumnPredicate {
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(
InvertedIndexQueryParamFactory::create_query_value<Type>(&_value, query_param));
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, query_param->get_value(),
RETURN_IF_ERROR(iterator->read_from_inverted_index(name_with_type, query_param->get_value(),
query_type, num_rows, roaring));

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
Expand Down
6 changes: 3 additions & 3 deletions be/src/olap/delta_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,9 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const PNodeInfo& node_info)
auto cur_rowset = _rowset_builder->rowset();
auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema();
if (!tablet_schema->skip_write_index_on_load()) {
for (auto& column : tablet_schema->columns()) {
const TabletIndex* index_meta = tablet_schema->inverted_index(*column);
if (index_meta) {
for (const auto& column : tablet_schema->columns()) {
auto index_metas = tablet_schema->inverted_indexs(*column);
for (const auto* index_meta : index_metas) {
indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix());
}
}
Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ class InListPredicateBase : public ColumnPredicate {
if (iterator == nullptr) {
return Status::OK();
}
std::string column_name = name_with_type.first;
roaring::Roaring indices;
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
Expand All @@ -199,7 +198,7 @@ class InListPredicateBase : public ColumnPredicate {
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iterator->read_from_inverted_index(
column_name, query_param->get_value(), query_type, num_rows, index));
name_with_type, query_param->get_value(), query_type, num_rows, index));
indices |= *index;
iter->next();
}
Expand Down
10 changes: 5 additions & 5 deletions be/src/olap/match_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam
"phrase queries require setting support_phrase = true");
}
auto type = name_with_type.second;
const std::string& name = name_with_type.first;
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
auto inverted_index_query_type = _to_inverted_index_query_type(_match_type);
TypeDescriptor column_desc = type->get_type_as_type_descriptor();
Expand All @@ -67,7 +66,7 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam
char* buffer = const_cast<char*>(_value.c_str());
match_value.replace(buffer, length); //is it safe?
RETURN_IF_ERROR(iterator->read_from_inverted_index(
name, &match_value, inverted_index_query_type, num_rows, roaring));
name_with_type, &match_value, inverted_index_query_type, num_rows, roaring));
} else if (column_desc.type == TYPE_ARRAY &&
is_numeric_type(
TabletColumn::get_field_type_by_type(column_desc.children[0].type))) {
Expand All @@ -76,7 +75,7 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam
TabletColumn::get_field_type_by_type(column_desc.children[0].type));
RETURN_IF_ERROR(type_info->from_string(buf.data(), _value));
RETURN_IF_ERROR(iterator->read_from_inverted_index(
name, buf.data(), inverted_index_query_type, num_rows, roaring, true));
name_with_type, buf.data(), inverted_index_query_type, num_rows, roaring, true));
}

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
Expand Down Expand Up @@ -125,8 +124,9 @@ InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType m
bool MatchPredicate::_check_evaluate(InvertedIndexIterator* iterator) const {
if (_match_type == MatchType::MATCH_PHRASE || _match_type == MatchType::MATCH_PHRASE_PREFIX ||
_match_type == MatchType::MATCH_PHRASE_EDGE) {
if (iterator->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
get_parser_phrase_support_string_from_properties(iterator->get_index_properties()) ==
auto reader = iterator->get_reader(InvertedIndexReaderType::FULLTEXT);
if (reader &&
get_parser_phrase_support_string_from_properties(reader->get_index_properties()) ==
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
return true;
}
Expand Down
67 changes: 32 additions & 35 deletions be/src/olap/rowset/beta_rowset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ void BetaRowset::clear_inverted_index_cache() {

auto index_path_prefix = InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path);
for (const auto& column : tablet_schema()->columns()) {
const TabletIndex* index_meta = tablet_schema()->inverted_index(*column);
if (index_meta) {
auto index_metas = tablet_schema()->inverted_indexs(*column);
for (const auto& index_meta : index_metas) {
auto inverted_index_file_cache_key =
InvertedIndexDescriptor::get_index_file_cache_key(
index_path_prefix, index_meta->index_id(),
Expand Down Expand Up @@ -237,9 +237,9 @@ Status BetaRowset::remove() {
}

if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
const TabletIndex* index_meta = _schema->inverted_index(*column);
if (index_meta) {
for (const auto& column : _schema->columns()) {
auto index_metas = _schema->inverted_indexs(*column);
for (const auto& index_meta : index_metas) {
std::string inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(seg_path),
Expand Down Expand Up @@ -411,10 +411,9 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row
auto src_path = local_segment_path(_tablet_path, rowset_id().to_string(), i);
RETURN_IF_ERROR(io::global_local_filesystem()->copy_path(src_path, dst_path));
if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
const TabletIndex* index_meta = _schema->inverted_index(*column);
if (index_meta) {
for (const auto& column : _schema->columns()) {
auto index_metas = _schema->inverted_indexs(*column);
for (const auto& index_meta : index_metas) {
std::string inverted_index_src_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(src_path),
Expand Down Expand Up @@ -470,10 +469,9 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new
dest_paths.emplace_back(remote_seg_path);
local_paths.emplace_back(local_seg_path);
if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
const TabletIndex* index_meta = _schema->inverted_index(*column);
if (index_meta) {
for (const auto& column : _schema->columns()) {
auto index_metas = _schema->inverted_indexs(*column);
for (const auto& index_meta : index_metas) {
std::string remote_inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(
Expand Down Expand Up @@ -679,9 +677,9 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) {
auto seg_path = DORIS_TRY(segment_path(seg_id));
file_paths.emplace_back(seg_path);
if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
const TabletIndex* index_meta = _schema->inverted_index(*column);
if (index_meta) {
for (const auto& column : _schema->columns()) {
auto index_metas = _schema->inverted_indexs(*column);
for (const auto& index_meta : index_metas) {
std::string inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(seg_path),
Expand Down Expand Up @@ -838,26 +836,25 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value,
} else {
rapidjson::Value indices(rapidjson::kArrayType);
for (auto column : _rowset_meta->tablet_schema()->columns()) {
const auto* index_meta = _rowset_meta->tablet_schema()->inverted_index(*column);
if (index_meta == nullptr) {
continue;
}
rapidjson::Value index(rapidjson::kObjectType);
auto index_id = index_meta->index_id();
auto index_suffix = index_meta->get_index_suffix();
index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator);
index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator),
allocator);
auto path = InvertedIndexDescriptor::get_index_file_path_v1(index_file_path_prefix,
index_id, index_suffix);
auto st = add_file_info_to_json(path, index);
if (!st.ok()) {
return st;
}
auto index_metes = _rowset_meta->tablet_schema()->inverted_indexs(*column);
for (const auto& index_meta : index_metes) {
rapidjson::Value index(rapidjson::kObjectType);
auto index_id = index_meta->index_id();
auto index_suffix = index_meta->get_index_suffix();
index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator);
index.AddMember("index_suffix",
rapidjson::Value(index_suffix.c_str(), allocator), allocator);
auto path = InvertedIndexDescriptor::get_index_file_path_v1(
index_file_path_prefix, index_id, index_suffix);
auto st = add_file_info_to_json(path, index);
if (!st.ok()) {
return st;
}

auto status = process_files(*index_meta, indices, index);
if (!status.ok()) {
return status;
auto status = process_files(*index_meta, indices, index);
if (!status.ok()) {
return status;
}
}
}
segment.AddMember("indices", indices, allocator);
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/beta_rowset_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u
}
// rename remaining inverted index files
for (auto column : _context.tablet_schema->columns()) {
if (const auto& index_info = _context.tablet_schema->inverted_index(*column);
index_info != nullptr) {
auto index_infos = _context.tablet_schema->inverted_indexs(*column);
for (const auto& index_info : index_infos) {
auto index_id = index_info->index_id();
if (_context.tablet_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/rowset/segcompaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e
}
// Delete inverted index files
for (auto&& column : schema->columns()) {
if (const auto* index_info = schema->inverted_index(*column); index_info != nullptr) {
auto index_infos = schema->inverted_indexs(*column);
for (const auto& index_info : index_infos) {
auto index_id = index_info->index_id();
if (schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
Expand Down
Loading
Loading