diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 0088228be4c2c9..15a05a62689bc0 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -737,10 +737,10 @@ Status Compaction::do_inverted_index_compaction() { Status status = Status::OK(); for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); - const auto* index_meta = _cur_tablet_schema->inverted_index(col); + auto index_metas = _cur_tablet_schema->inverted_indexs(col); DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_can_not_find_index_meta", - { index_meta = nullptr; }) - if (index_meta == nullptr) { + { index_metas.clear(); }) + if (index_metas.empty()) { status = Status::Error( fmt::format("Can not find index_meta for col {}", col.name())); LOG(WARNING) << "failed to do index compaction, can not find index_meta for column" @@ -749,56 +749,60 @@ Status Compaction::do_inverted_index_compaction() { error_handler(-1, column_uniq_id); break; } - - std::vector dest_index_dirs(dest_segment_num); - try { - std::vector> src_idx_dirs(src_segment_num); - for (int src_segment_id = 0; src_segment_id < src_segment_num; src_segment_id++) { - auto res = inverted_index_file_readers[src_segment_id]->open(index_meta); - DBUG_EXECUTE_IF("Compaction::open_inverted_index_file_reader", { - res = ResultError(Status::Error( - "debug point: Compaction::open_index_file_reader error")); - }) - if (!res.has_value()) { - LOG(WARNING) << "failed to do index compaction, open inverted index file " - "reader failed" - << ". tablet=" << _tablet->tablet_id() - << ", column uniq id=" << column_uniq_id - << ", src_segment_id=" << src_segment_id; - throw Exception(ErrorCode::INVERTED_INDEX_COMPACTION_ERROR, res.error().msg()); + for (const auto& index_meta : index_metas) { + std::vector dest_index_dirs(dest_segment_num); + try { + std::vector> src_idx_dirs(src_segment_num); + for (int src_segment_id = 0; src_segment_id < src_segment_num; src_segment_id++) { + auto res = inverted_index_file_readers[src_segment_id]->open(index_meta); + DBUG_EXECUTE_IF("Compaction::open_inverted_index_file_reader", { + res = ResultError(Status::Error( + "debug point: Compaction::open_index_file_reader error")); + }) + if (!res.has_value()) { + LOG(WARNING) << "failed to do index compaction, open inverted index file " + "reader failed" + << ". tablet=" << _tablet->tablet_id() + << ", column uniq id=" << column_uniq_id + << ", src_segment_id=" << src_segment_id; + throw Exception(ErrorCode::INVERTED_INDEX_COMPACTION_ERROR, + res.error().msg()); + } + src_idx_dirs[src_segment_id] = std::move(res.value()); } - src_idx_dirs[src_segment_id] = std::move(res.value()); - } - for (int dest_segment_id = 0; dest_segment_id < dest_segment_num; dest_segment_id++) { - auto res = inverted_index_file_writers[dest_segment_id]->open(index_meta); - DBUG_EXECUTE_IF("Compaction::open_inverted_index_file_writer", { - res = ResultError(Status::Error( - "debug point: Compaction::open_inverted_index_file_writer error")); - }) - if (!res.has_value()) { - LOG(WARNING) << "failed to do index compaction, open inverted index file " - "writer failed" - << ". tablet=" << _tablet->tablet_id() - << ", column uniq id=" << column_uniq_id - << ", dest_segment_id=" << dest_segment_id; - throw Exception(ErrorCode::INVERTED_INDEX_COMPACTION_ERROR, res.error().msg()); + for (int dest_segment_id = 0; dest_segment_id < dest_segment_num; + dest_segment_id++) { + auto res = inverted_index_file_writers[dest_segment_id]->open(index_meta); + DBUG_EXECUTE_IF("Compaction::open_inverted_index_file_writer", { + res = ResultError(Status::Error( + "debug point: Compaction::open_inverted_index_file_writer error")); + }) + if (!res.has_value()) { + LOG(WARNING) << "failed to do index compaction, open inverted index file " + "writer failed" + << ". tablet=" << _tablet->tablet_id() + << ", column uniq id=" << column_uniq_id + << ", dest_segment_id=" << dest_segment_id; + throw Exception(ErrorCode::INVERTED_INDEX_COMPACTION_ERROR, + res.error().msg()); + } + // Destination directories in dest_index_dirs do not need to be deconstructed, + // but their lifecycle must be managed by inverted_index_file_writers. + dest_index_dirs[dest_segment_id] = res.value().get(); } - // Destination directories in dest_index_dirs do not need to be deconstructed, - // but their lifecycle must be managed by inverted_index_file_writers. - dest_index_dirs[dest_segment_id] = res.value().get(); - } - auto st = compact_column(index_meta->index_id(), src_idx_dirs, dest_index_dirs, - index_tmp_path.native(), trans_vec, dest_segment_num_rows); - if (!st.ok()) { + auto st = compact_column(index_meta->index_id(), src_idx_dirs, dest_index_dirs, + index_tmp_path.native(), trans_vec, dest_segment_num_rows); + if (!st.ok()) { + error_handler(index_meta->index_id(), column_uniq_id); + status = Status::Error(st.msg()); + } + } catch (CLuceneError& e) { + error_handler(index_meta->index_id(), column_uniq_id); + status = Status::Error(e.what()); + } catch (const Exception& e) { error_handler(index_meta->index_id(), column_uniq_id); - status = Status::Error(st.msg()); + status = Status::Error(e.what()); } - } catch (CLuceneError& e) { - error_handler(index_meta->index_id(), column_uniq_id); - status = Status::Error(e.what()); - } catch (const Exception& e) { - error_handler(index_meta->index_id(), column_uniq_id); - status = Status::Error(e.what()); } } @@ -818,17 +822,19 @@ void Compaction::mark_skip_index_compaction( const std::function& error_handler) { for (auto&& column_uniq_id : context.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); - const auto* index_meta = _cur_tablet_schema->inverted_index(col); + auto index_metas = _cur_tablet_schema->inverted_indexs(col); DBUG_EXECUTE_IF("Compaction::mark_skip_index_compaction_can_not_find_index_meta", - { index_meta = nullptr; }) - if (index_meta == nullptr) { + { index_metas.clear(); }) + if (index_metas.empty()) { LOG(WARNING) << "mark skip index compaction, can not find index_meta for column" << ". tablet=" << _tablet->tablet_id() << ", column uniq id=" << column_uniq_id; error_handler(-1, column_uniq_id); continue; } - error_handler(index_meta->index_id(), column_uniq_id); + for (const auto& index_meta : index_metas) { + error_handler(index_meta->index_id(), column_uniq_id); + } } } @@ -857,24 +863,30 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { bool is_continue = false; std::optional> first_properties; for (const auto& rowset : _input_rowsets) { - const auto* tablet_index = rowset->tablet_schema()->inverted_index(col_unique_id); + auto tablet_indexs = rowset->tablet_schema()->inverted_indexs(col_unique_id); // no inverted index or index id is different from current index id - if (tablet_index == nullptr || tablet_index->index_id() != index->index_id()) { + auto it = std::find_if(tablet_indexs.begin(), tablet_indexs.end(), + [&index](const auto& tablet_index) { + return tablet_index->index_id() == index->index_id(); + }); + if (it != tablet_indexs.end()) { + const auto* tablet_index = *it; + auto properties = tablet_index->properties(); + if (!first_properties.has_value()) { + first_properties = properties; + } else { + DBUG_EXECUTE_IF( + "Compaction::do_inverted_index_compaction_index_properties_different", + { properties.emplace("dummy_key", "dummy_value"); }) + if (properties != first_properties.value()) { + is_continue = true; + break; + } + } + } else { is_continue = true; break; } - auto properties = tablet_index->properties(); - if (!first_properties.has_value()) { - first_properties = properties; - } else { - DBUG_EXECUTE_IF( - "Compaction::do_inverted_index_compaction_index_properties_different", - { properties.emplace("dummy_key", "dummy_value"); }) - if (properties != first_properties.value()) { - is_continue = true; - break; - } - } } if (is_continue) { continue; @@ -899,94 +911,100 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { return false; } - const auto* index_meta = rowset->tablet_schema()->inverted_index(col_unique_id); + auto index_metas = rowset->tablet_schema()->inverted_indexs(col_unique_id); DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_index_meta_nullptr", - { index_meta = nullptr; }) - if (index_meta == nullptr) { + { index_metas.clear(); }) + if (index_metas.empty()) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" << col_unique_id << "] index meta is null, will skip index compaction"; return false; } - - for (auto i = 0; i < rowset->num_segments(); i++) { - // TODO: inverted_index_path - auto seg_path = rowset->segment_path(i); - DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_seg_path_nullptr", { - seg_path = ResultError(Status::Error( - "construct_skip_inverted_index_seg_path_nullptr")); - }) - if (!seg_path) { - LOG(WARNING) << seg_path.error(); - return false; - } - - std::string index_file_path; - try { - auto inverted_index_file_reader = std::make_unique( - fs, - std::string {InvertedIndexDescriptor::get_index_file_path_prefix( - seg_path.value())}, - _cur_tablet_schema->get_inverted_index_storage_format(), - rowset->rowset_meta()->inverted_index_file_info(i)); - auto st = inverted_index_file_reader->init( - config::inverted_index_read_buffer_size); - index_file_path = inverted_index_file_reader->get_index_file_path(index_meta); - DBUG_EXECUTE_IF( - "Compaction::construct_skip_inverted_index_index_file_reader_init_" - "status_not_ok", - { - st = Status::Error( - "debug point: " - "construct_skip_inverted_index_index_file_reader_init_" - "status_" - "not_ok"); - }) - if (!st.ok()) { - LOG(WARNING) << "init index " << index_file_path << " error:" << st; + for (const auto& index_meta : index_metas) { + for (auto i = 0; i < rowset->num_segments(); i++) { + // TODO: inverted_index_path + auto seg_path = rowset->segment_path(i); + DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_seg_path_nullptr", { + seg_path = ResultError(Status::Error( + "construct_skip_inverted_index_seg_path_nullptr")); + }) + if (!seg_path) { + LOG(WARNING) << seg_path.error(); return false; } - // check index meta - auto result = inverted_index_file_reader->open(index_meta); - DBUG_EXECUTE_IF( - "Compaction::construct_skip_inverted_index_index_file_reader_open_" - "error", - { - result = ResultError( - Status::Error( - "CLuceneError occur when open idx file")); - }) - if (!result.has_value()) { - LOG(WARNING) - << "open index " << index_file_path << " error:" << result.error(); - return false; - } - auto reader = std::move(result.value()); - std::vector files; - reader->list(&files); - reader->close(); - DBUG_EXECUTE_IF( - "Compaction::construct_skip_inverted_index_index_reader_close_error", - { _CLTHROWA(CL_ERR_IO, "debug point: reader close error"); }) - - DBUG_EXECUTE_IF("Compaction::construct_skip_inverted_index_index_files_count", - { files.clear(); }) - - // why is 3? - // slice type index file at least has 3 files: null_bitmap, segments_N, segments.gen - if (files.size() < 3) { + std::string index_file_path; + try { + auto inverted_index_file_reader = std::make_unique( + fs, + std::string {InvertedIndexDescriptor::get_index_file_path_prefix( + seg_path.value())}, + _cur_tablet_schema->get_inverted_index_storage_format(), + rowset->rowset_meta()->inverted_index_file_info(i)); + auto st = inverted_index_file_reader->init( + config::inverted_index_read_buffer_size); + index_file_path = + inverted_index_file_reader->get_index_file_path(index_meta); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_file_reader_init_" + "status_not_ok", + { + st = Status::Error( + "debug point: " + "construct_skip_inverted_index_index_file_reader_init_" + "status_" + "not_ok"); + }) + if (!st.ok()) { + LOG(WARNING) << "init index " << index_file_path << " error:" << st; + return false; + } + + // check index meta + auto result = inverted_index_file_reader->open(index_meta); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_file_reader_open_" + "error", + { + result = ResultError( + Status::Error( + "CLuceneError occur when open idx file")); + }) + if (!result.has_value()) { + LOG(WARNING) << "open index " << index_file_path + << " error:" << result.error(); + return false; + } + auto reader = std::move(result.value()); + std::vector files; + reader->list(&files); + reader->close(); + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_reader_close_" + "error", + { _CLTHROWA(CL_ERR_IO, "debug point: reader close error"); }) + + DBUG_EXECUTE_IF( + "Compaction::construct_skip_inverted_index_index_files_count", + { files.clear(); }) + + // why is 3? + // slice type index file at least has 3 files: null_bitmap, segments_N, segments.gen + if (files.size() < 3) { + LOG(WARNING) + << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" + << col_unique_id << "]," << index_file_path + << " is corrupted, will skip index compaction"; + return false; + } + } catch (CLuceneError& err) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" - << col_unique_id << "]," << index_file_path - << " is corrupted, will skip index compaction"; + << col_unique_id << "] open index[" << index_file_path + << "], will skip index compaction, error:" << err.what(); return false; } - } catch (CLuceneError& err) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" - << col_unique_id << "] open index[" << index_file_path - << "], will skip index compaction, error:" << err.what(); - return false; } } + return true; }; diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 2ce2ca57f3db7e..7d9d233b637730 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -73,7 +73,6 @@ class ComparisonPredicateBase : public ColumnPredicate { if (iterator == nullptr) { return Status::OK(); } - std::string column_name = name_with_type.first; InvertedIndexQueryType query_type = InvertedIndexQueryType::UNKNOWN_QUERY; switch (PT) { @@ -104,7 +103,7 @@ class ComparisonPredicateBase : public ColumnPredicate { std::unique_ptr query_param = nullptr; RETURN_IF_ERROR( InvertedIndexQueryParamFactory::create_query_value(&_value, query_param)); - RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, query_param->get_value(), + RETURN_IF_ERROR(iterator->read_from_inverted_index(name_with_type, query_param->get_value(), query_type, num_rows, roaring)); // mask out null_bitmap, since NULL cmp VALUE will produce NULL diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 096ee2df2965d1..a34bb493d5bfeb 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -253,9 +253,9 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const PNodeInfo& node_info) auto cur_rowset = _rowset_builder->rowset(); auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema(); if (!tablet_schema->skip_write_index_on_load()) { - for (auto& column : tablet_schema->columns()) { - const TabletIndex* index_meta = tablet_schema->inverted_index(*column); - if (index_meta) { + for (const auto& column : tablet_schema->columns()) { + auto index_metas = tablet_schema->inverted_indexs(*column); + for (const auto* index_meta : index_metas) { indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix()); } } diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 2dcdb24b636cec..7c3d799136a194 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -186,7 +186,6 @@ class InListPredicateBase : public ColumnPredicate { if (iterator == nullptr) { return Status::OK(); } - std::string column_name = name_with_type.first; roaring::Roaring indices; HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { @@ -199,7 +198,7 @@ class InListPredicateBase : public ColumnPredicate { InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; std::shared_ptr index = std::make_shared(); RETURN_IF_ERROR(iterator->read_from_inverted_index( - column_name, query_param->get_value(), query_type, num_rows, index)); + name_with_type, query_param->get_value(), query_type, num_rows, index)); indices |= *index; iter->next(); } diff --git a/be/src/olap/match_predicate.cpp b/be/src/olap/match_predicate.cpp index 683e38775f34c2..07858d2eb5ecfc 100644 --- a/be/src/olap/match_predicate.cpp +++ b/be/src/olap/match_predicate.cpp @@ -56,7 +56,6 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam "phrase queries require setting support_phrase = true"); } auto type = name_with_type.second; - const std::string& name = name_with_type.first; std::shared_ptr roaring = std::make_shared(); auto inverted_index_query_type = _to_inverted_index_query_type(_match_type); TypeDescriptor column_desc = type->get_type_as_type_descriptor(); @@ -67,7 +66,7 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam char* buffer = const_cast(_value.c_str()); match_value.replace(buffer, length); //is it safe? RETURN_IF_ERROR(iterator->read_from_inverted_index( - name, &match_value, inverted_index_query_type, num_rows, roaring)); + name_with_type, &match_value, inverted_index_query_type, num_rows, roaring)); } else if (column_desc.type == TYPE_ARRAY && is_numeric_type( TabletColumn::get_field_type_by_type(column_desc.children[0].type))) { @@ -76,7 +75,7 @@ Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& nam TabletColumn::get_field_type_by_type(column_desc.children[0].type)); RETURN_IF_ERROR(type_info->from_string(buf.data(), _value)); RETURN_IF_ERROR(iterator->read_from_inverted_index( - name, buf.data(), inverted_index_query_type, num_rows, roaring, true)); + name_with_type, buf.data(), inverted_index_query_type, num_rows, roaring, true)); } // mask out null_bitmap, since NULL cmp VALUE will produce NULL @@ -125,8 +124,9 @@ InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType m bool MatchPredicate::_check_evaluate(InvertedIndexIterator* iterator) const { if (_match_type == MatchType::MATCH_PHRASE || _match_type == MatchType::MATCH_PHRASE_PREFIX || _match_type == MatchType::MATCH_PHRASE_EDGE) { - if (iterator->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT && - get_parser_phrase_support_string_from_properties(iterator->get_index_properties()) == + auto reader = iterator->get_reader(InvertedIndexReaderType::FULLTEXT); + if (reader && + get_parser_phrase_support_string_from_properties(reader->get_index_properties()) == INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) { return true; } diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index b25b261c2bbafb..d4709be0321b17 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -133,8 +133,8 @@ void BetaRowset::clear_inverted_index_cache() { auto index_path_prefix = InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path); for (const auto& column : tablet_schema()->columns()) { - const TabletIndex* index_meta = tablet_schema()->inverted_index(*column); - if (index_meta) { + auto index_metas = tablet_schema()->inverted_indexs(*column); + for (const auto& index_meta : index_metas) { auto inverted_index_file_cache_key = InvertedIndexDescriptor::get_index_file_cache_key( index_path_prefix, index_meta->index_id(), @@ -237,9 +237,9 @@ Status BetaRowset::remove() { } if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->inverted_index(*column); - if (index_meta) { + for (const auto& column : _schema->columns()) { + auto index_metas = _schema->inverted_indexs(*column); + for (const auto& index_meta : index_metas) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), @@ -411,10 +411,9 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row auto src_path = local_segment_path(_tablet_path, rowset_id().to_string(), i); RETURN_IF_ERROR(io::global_local_filesystem()->copy_path(src_path, dst_path)); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (auto& column : _schema->columns()) { - // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->inverted_index(*column); - if (index_meta) { + for (const auto& column : _schema->columns()) { + auto index_metas = _schema->inverted_indexs(*column); + for (const auto& index_meta : index_metas) { std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(src_path), @@ -470,10 +469,9 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new dest_paths.emplace_back(remote_seg_path); local_paths.emplace_back(local_seg_path); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (auto& column : _schema->columns()) { - // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->inverted_index(*column); - if (index_meta) { + for (const auto& column : _schema->columns()) { + auto index_metas = _schema->inverted_indexs(*column); + for (const auto& index_meta : index_metas) { std::string remote_inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix( @@ -679,9 +677,9 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) { auto seg_path = DORIS_TRY(segment_path(seg_id)); file_paths.emplace_back(seg_path); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->inverted_index(*column); - if (index_meta) { + for (const auto& column : _schema->columns()) { + auto index_metas = _schema->inverted_indexs(*column); + for (const auto& index_meta : index_metas) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), @@ -838,26 +836,25 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value, } else { rapidjson::Value indices(rapidjson::kArrayType); for (auto column : _rowset_meta->tablet_schema()->columns()) { - const auto* index_meta = _rowset_meta->tablet_schema()->inverted_index(*column); - if (index_meta == nullptr) { - continue; - } - rapidjson::Value index(rapidjson::kObjectType); - auto index_id = index_meta->index_id(); - auto index_suffix = index_meta->get_index_suffix(); - index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator); - index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator), - allocator); - auto path = InvertedIndexDescriptor::get_index_file_path_v1(index_file_path_prefix, - index_id, index_suffix); - auto st = add_file_info_to_json(path, index); - if (!st.ok()) { - return st; - } + auto index_metes = _rowset_meta->tablet_schema()->inverted_indexs(*column); + for (const auto& index_meta : index_metes) { + rapidjson::Value index(rapidjson::kObjectType); + auto index_id = index_meta->index_id(); + auto index_suffix = index_meta->get_index_suffix(); + index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator); + index.AddMember("index_suffix", + rapidjson::Value(index_suffix.c_str(), allocator), allocator); + auto path = InvertedIndexDescriptor::get_index_file_path_v1( + index_file_path_prefix, index_id, index_suffix); + auto st = add_file_info_to_json(path, index); + if (!st.ok()) { + return st; + } - auto status = process_files(*index_meta, indices, index); - if (!status.ok()) { - return status; + auto status = process_files(*index_meta, indices, index); + if (!status.ok()) { + return status; + } } } segment.AddMember("indices", indices, allocator); diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 3c44f701d1fd1a..6bd196fc92b756 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -560,8 +560,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u } // rename remaining inverted index files for (auto column : _context.tablet_schema->columns()) { - if (const auto& index_info = _context.tablet_schema->inverted_index(*column); - index_info != nullptr) { + auto index_infos = _context.tablet_schema->inverted_indexs(*column); + for (const auto& index_info : index_infos) { auto index_id = index_info->index_id(); if (_context.tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index bf12ce8cbbc366..b26789ee366867 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -179,7 +179,8 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e } // Delete inverted index files for (auto&& column : schema->columns()) { - if (const auto* index_info = schema->inverted_index(*column); index_info != nullptr) { + auto index_infos = schema->inverted_indexs(*column); + for (const auto& index_info : index_infos) { auto index_id = index_info->index_id(); if (schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 5c42268490467d..31536c44236e64 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -522,7 +522,7 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF _subcolumn_readers = std::make_unique(); _statistics = std::make_unique(); const ColumnMetaPB& self_column_pb = footer.columns(column_id); - const auto* parent_index = opts.tablet_schema->inverted_index(self_column_pb.unique_id()); + const auto& parent_index = opts.tablet_schema->inverted_indexs(self_column_pb.unique_id()); for (const ColumnMetaPB& column_pb : footer.columns()) { // Find all columns belonging to the current variant column // 1. not the variant column @@ -589,23 +589,23 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF _subcolumn_readers->add(relative_path, SubcolumnReader {std::move(reader), get_data_type_fn()}); TabletSchema::SubColumnInfo sub_column_info; + // if subcolumn has index, add index to _variant_subcolumns_indexes if (vectorized::schema_util::generate_sub_column_info( *opts.tablet_schema, self_column_pb.unique_id(), relative_path.get_path(), &sub_column_info) && - sub_column_info.index != nullptr) { - const auto* index_meta = sub_column_info.index.get(); - DCHECK(index_meta != nullptr); - auto subcolumn_index = std::make_unique(*index_meta); - _variant_subcolumns_indexes.emplace(path.get_path(), std::move(subcolumn_index)); - } else if (parent_index) { - const auto& suffix_path = path.get_path(); - auto it = _variant_subcolumns_indexes.find(suffix_path); - if (it == _variant_subcolumns_indexes.end()) { - auto subcolumn_index = std::make_unique(*parent_index); + !sub_column_info.indexes.empty()) { + _variant_subcolumns_indexes[path.get_path()] = std::move(sub_column_info.indexes); + } + // if parent column has index, add index to _variant_subcolumns_indexes + else if (!parent_index.empty() && + InvertedIndexColumnWriter::check_support_inverted_index( + (FieldType)column_pb.type())) { + for (const auto& index : parent_index) { + const auto& suffix_path = path.get_path(); + auto subcolumn_index = std::make_unique(*index); subcolumn_index->set_escaped_escaped_index_suffix_path(suffix_path); - _variant_subcolumns_indexes.emplace(suffix_path, std::move(subcolumn_index)); - } else { - DCHECK(false); + _variant_subcolumns_indexes[suffix_path].emplace_back( + std::move(subcolumn_index)); } } } @@ -622,9 +622,16 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF return Status::OK(); } -TabletIndex* VariantColumnReader::find_subcolumn_tablet_index(const std::string& path) { +std::vector VariantColumnReader::find_subcolumn_tablet_indexes( + const std::string& path) { auto it = _variant_subcolumns_indexes.find(path); - return it == _variant_subcolumns_indexes.end() ? nullptr : it->second.get(); + std::vector indexes; + if (it != _variant_subcolumns_indexes.end()) { + for (const auto& index : it->second) { + indexes.push_back(index.get()); + } + } + return indexes; } std::vector VariantColumnReader::get_typed_paths() const { @@ -781,9 +788,10 @@ Status ColumnReader::new_inverted_index_iterator( RETURN_IF_ERROR(_ensure_inverted_index_loaded(std::move(index_file_reader), index_meta)); { std::shared_lock rlock(_load_index_lock); - if (_inverted_index) { - RETURN_IF_ERROR(_inverted_index->new_iterator(read_options.io_ctx, read_options.stats, - read_options.runtime_state, iterator)); + auto iter = _inverted_indexs.find(index_meta->index_id()); + if (iter != _inverted_indexs.end()) { + RETURN_IF_ERROR(iter->second->new_iterator(read_options.io_ctx, read_options.stats, + read_options.runtime_state, iterator)); } } return Status::OK(); @@ -1066,8 +1074,13 @@ Status ColumnReader::_load_inverted_index_index( std::shared_ptr index_file_reader, const TabletIndex* index_meta) { std::unique_lock wlock(_load_index_lock); - if (_inverted_index && index_meta && - _inverted_index->get_index_id() == index_meta->index_id()) { + if (index_meta == nullptr) { + return Status::Error( + "Failed to load inverted index: index metadata is null"); + } + + auto it = _inverted_indexs.find(index_meta->index_id()); + if (it != _inverted_indexs.end()) { return Status::OK(); } @@ -1080,17 +1093,18 @@ Status ColumnReader::_load_inverted_index_index( type = _type_info->type(); } + std::shared_ptr inverted_index; if (is_string_type(type)) { if (parser_type != InvertedIndexParserType::PARSER_NONE) { try { - _inverted_index = FullTextIndexReader::create_shared(index_meta, index_file_reader); + inverted_index = FullTextIndexReader::create_shared(index_meta, index_file_reader); } catch (const CLuceneError& e) { return Status::Error( "create FullTextIndexReader error: {}", e.what()); } } else { try { - _inverted_index = + inverted_index = StringTypeInvertedIndexReader::create_shared(index_meta, index_file_reader); } catch (const CLuceneError& e) { return Status::Error( @@ -1099,18 +1113,16 @@ Status ColumnReader::_load_inverted_index_index( } } else if (is_numeric_type(type)) { try { - _inverted_index = BkdIndexReader::create_shared(index_meta, index_file_reader); + inverted_index = BkdIndexReader::create_shared(index_meta, index_file_reader); } catch (const CLuceneError& e) { return Status::Error( "create BkdIndexReader error: {}", e.what()); } } else { - _inverted_index.reset(); + return Status::Error( + "Field type {} is not supported for inverted index", type); } - // TODO: move has null to inverted_index_reader's query function - //bool has_null = true; - //RETURN_IF_ERROR(index_file_reader->has_null(index_meta, &has_null)); - //_inverted_index->set_has_null(has_null); + _inverted_indexs[index_meta->index_id()] = inverted_index; return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index c0c6ca06882da4..e74302caa7c16d 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -42,6 +42,7 @@ #include "olap/rowset/segment_v2/page_pointer.h" #include "olap/rowset/segment_v2/parsed_page.h" // for ParsedPage #include "olap/rowset/segment_v2/stream_reader.h" +#include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" #include "util/once.h" @@ -300,7 +301,7 @@ class ColumnReader : public MetadataAdder { std::unique_ptr _zone_map_index; std::unique_ptr _ordinal_index; std::unique_ptr _bitmap_index; - std::shared_ptr _inverted_index; + std::unordered_map> _inverted_indexs; std::shared_ptr _bloom_filter_index; std::vector> _sub_readers; @@ -329,7 +330,7 @@ class VariantColumnReader : public ColumnReader { int64_t get_metadata_size() const override; - TabletIndex* find_subcolumn_tablet_index(const std::string&); + std::vector find_subcolumn_tablet_indexes(const std::string&); bool exist_in_sparse_column(const vectorized::PathInData& path) const; @@ -353,7 +354,8 @@ class VariantColumnReader : public ColumnReader { std::unique_ptr _subcolumn_readers; std::unique_ptr _sparse_column_reader; std::unique_ptr _statistics; - std::unordered_map> _variant_subcolumns_indexes; + // key: subcolumn path, value: subcolumn indexes + std::unordered_map _variant_subcolumns_indexes; }; // Base iterator to read one column data diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index bad3111d5916aa..ffc7385c03ee44 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -427,6 +427,7 @@ ScalarColumnWriter::ScalarColumnWriter(const ColumnWriterOptions& opts, DCHECK(opts.meta->has_compression()); DCHECK(opts.meta->has_is_nullable()); DCHECK(file_writer != nullptr); + _inverted_index_builders.resize(_opts.inverted_indexs.size()); } ScalarColumnWriter::~ScalarColumnWriter() { @@ -470,40 +471,44 @@ Status ScalarColumnWriter::init() { if (_opts.need_inverted_index) { do { - DBUG_EXECUTE_IF("column_writer.init", { - class InvertedIndexColumnWriterEmptyImpl final : public InvertedIndexColumnWriter { - public: - Status init() override { return Status::OK(); } - Status add_values(const std::string name, const void* values, - size_t count) override { - return Status::OK(); - } - Status add_array_values(size_t field_size, const CollectionValue* values, - size_t count) override { - return Status::OK(); - } - Status add_array_values(size_t field_size, const void* value_ptr, - const uint8_t* null_map, const uint8_t* offsets_ptr, - size_t count) override { - return Status::OK(); - } - Status add_nulls(uint32_t count) override { return Status::OK(); } - Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override { - return Status::OK(); - } - Status finish() override { return Status::OK(); } - int64_t size() const override { return 0; } - void close_on_error() override {} - }; - - _inverted_index_builder = std::make_unique(); - - break; - }); - - RETURN_IF_ERROR(InvertedIndexColumnWriter::create(get_field(), &_inverted_index_builder, - _opts.inverted_index_file_writer, - _opts.inverted_index)); + for (size_t i = 0; i < _opts.inverted_indexs.size(); i++) { + DBUG_EXECUTE_IF("column_writer.init", { + class InvertedIndexColumnWriterEmptyImpl final + : public InvertedIndexColumnWriter { + public: + Status init() override { return Status::OK(); } + Status add_values(const std::string name, const void* values, + size_t count) override { + return Status::OK(); + } + Status add_array_values(size_t field_size, const CollectionValue* values, + size_t count) override { + return Status::OK(); + } + Status add_array_values(size_t field_size, const void* value_ptr, + const uint8_t* null_map, const uint8_t* offsets_ptr, + size_t count) override { + return Status::OK(); + } + Status add_nulls(uint32_t count) override { return Status::OK(); } + Status add_array_nulls(const uint8_t* null_map, size_t num_rows) override { + return Status::OK(); + } + Status finish() override { return Status::OK(); } + int64_t size() const override { return 0; } + void close_on_error() override {} + }; + + _inverted_index_builders[i] = + std::make_unique(); + + break; + }); + + RETURN_IF_ERROR(InvertedIndexColumnWriter::create( + get_field(), &_inverted_index_builders[i], _opts.inverted_index_file_writer, + _opts.inverted_indexs[i])); + } } while (false); } if (_opts.need_bloom_filter) { @@ -529,7 +534,9 @@ Status ScalarColumnWriter::append_nulls(size_t num_rows) { _bitmap_index_builder->add_nulls(num_rows); } if (_opts.need_inverted_index) { - RETURN_IF_ERROR(_inverted_index_builder->add_nulls(num_rows)); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->add_nulls(num_rows)); + } } if (_opts.need_bloom_filter) { _bloom_filter_index_builder->add_nulls(num_rows); @@ -565,8 +572,9 @@ Status ScalarColumnWriter::_internal_append_data_in_current_page(const uint8_t* _bitmap_index_builder->add_values(data, *num_written); } if (_opts.need_inverted_index) { - RETURN_IF_ERROR( - _inverted_index_builder->add_values(get_field()->name(), data, *num_written)); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->add_values(get_field()->name(), data, *num_written)); + } } if (_opts.need_bloom_filter) { RETURN_IF_ERROR(_bloom_filter_index_builder->add_values(data, *num_written)); @@ -658,7 +666,9 @@ Status ScalarColumnWriter::write_bitmap_index() { Status ScalarColumnWriter::write_inverted_index() { if (_opts.need_inverted_index) { - return _inverted_index_builder->finish(); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->finish()); + } } return Status::OK(); } @@ -914,11 +924,14 @@ Status ArrayColumnWriter::init() { } RETURN_IF_ERROR(_item_writer->init()); if (_opts.need_inverted_index) { + _inverted_index_builders.resize(_opts.inverted_indexs.size()); auto* writer = dynamic_cast(_item_writer.get()); if (writer != nullptr) { - RETURN_IF_ERROR(InvertedIndexColumnWriter::create(get_field(), &_inverted_index_builder, - _opts.inverted_index_file_writer, - _opts.inverted_index)); + for (size_t i = 0; i < _opts.inverted_indexs.size(); i++) { + RETURN_IF_ERROR(InvertedIndexColumnWriter::create( + get_field(), &_inverted_index_builders[i], _opts.inverted_index_file_writer, + _opts.inverted_indexs[i])); + } } } return Status::OK(); @@ -926,7 +939,9 @@ Status ArrayColumnWriter::init() { Status ArrayColumnWriter::write_inverted_index() { if (_opts.need_inverted_index) { - return _inverted_index_builder->finish(); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->finish()); + } } return Status::OK(); } @@ -951,9 +966,11 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { // now only support nested type is scala if (writer != nullptr) { //NOTE: use array field name as index field, but item_writer size should be used when moving item_data_ptr - RETURN_IF_ERROR(_inverted_index_builder->add_array_values( - _item_writer->get_field()->size(), reinterpret_cast(data), - reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->add_array_values( + _item_writer->get_field()->size(), reinterpret_cast(data), + reinterpret_cast(nested_null_map), offsets_ptr, num_rows)); + } } } @@ -972,7 +989,9 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t RETURN_IF_ERROR(append_data(ptr, num_rows)); if (is_nullable()) { if (_opts.need_inverted_index) { - RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, num_rows)); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->add_array_nulls(null_map, num_rows)); + } } RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows)); } @@ -1170,7 +1189,9 @@ Status MapColumnWriter::finish_current_page() { Status MapColumnWriter::write_inverted_index() { if (_opts.need_inverted_index) { - return _inverted_index_builder->finish(); + for (const auto& builder : _inverted_index_builders) { + RETURN_IF_ERROR(builder->finish()); + } } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 3139f00be631ab..f9b3cb62377812 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -67,8 +67,7 @@ struct ColumnWriterOptions { uint8_t gram_size; uint16_t gram_bf_size; BloomFilterOptions bf_options; - std::vector indexes; // unused - const TabletIndex* inverted_index = nullptr; + std::vector inverted_indexs; InvertedIndexFileWriter* inverted_index_file_writer; // variant column writer used SegmentFooterPB* footer = nullptr; @@ -290,7 +289,7 @@ class ScalarColumnWriter : public ColumnWriter { std::unique_ptr _ordinal_index_builder; std::unique_ptr _zone_map_index_builder; std::unique_ptr _bitmap_index_builder; - std::unique_ptr _inverted_index_builder; + std::vector> _inverted_index_builders; std::unique_ptr _bloom_filter_index_builder; // call before flush data page. @@ -419,7 +418,7 @@ class ArrayColumnWriter final : public ColumnWriter { std::unique_ptr _offset_writer; std::unique_ptr _null_writer; std::unique_ptr _item_writer; - std::unique_ptr _inverted_index_builder; + std::vector> _inverted_index_builders; ColumnWriterOptions _opts; }; @@ -473,7 +472,7 @@ class MapColumnWriter final : public ColumnWriter { // we need null writer to make sure a row is null or not std::unique_ptr _null_writer; std::unique_ptr _offsets_writer; - std::unique_ptr _inverted_index_builder; + std::vector> _inverted_index_builders; ColumnWriterOptions _opts; }; @@ -524,7 +523,7 @@ class VariantSubcolumnWriter : public ColumnWriter { const TabletColumn* _tablet_column = nullptr; ColumnWriterOptions _opts; std::unique_ptr _writer; - std::unique_ptr _index; + TabletIndexes _indexes; }; class VariantColumnWriter : public ColumnWriter { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h index f1a47ebdd0f2eb..4595ed64cdf1e5 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h @@ -81,6 +81,10 @@ enum class InvertedIndexQueryType { MATCH_PHRASE_EDGE_QUERY = 10, }; +inline bool is_equal_query(InvertedIndexQueryType query_type) { + return query_type == InvertedIndexQueryType::EQUAL_QUERY; +} + inline bool is_range_query(InvertedIndexQueryType query_type) { return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 4574bf7c0cb657..66bd8f6e8934ce 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -50,6 +50,7 @@ #include "olap/rowset/segment_v2/inverted_index_fs_directory.h" #include "olap/rowset/segment_v2/inverted_index_searcher.h" #include "olap/types.h" +#include "runtime/primitive_type.h" #include "runtime/runtime_state.h" #include "util/faststring.h" #include "util/runtime_profile.h" @@ -295,8 +296,10 @@ Status InvertedIndexReader::match_index_search( Status FullTextIndexReader::new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = - InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); + if (*iterator == nullptr) { + *iterator = InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state); + } + (*iterator)->add_reader(InvertedIndexReaderType::FULLTEXT, shared_from_this()); return Status::OK(); } @@ -391,8 +394,10 @@ InvertedIndexReaderType FullTextIndexReader::type() { Status StringTypeInvertedIndexReader::new_iterator( const io::IOContext& io_ctx, OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = - InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); + if (*iterator == nullptr) { + *iterator = InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state); + } + (*iterator)->add_reader(InvertedIndexReaderType::STRING_TYPE, shared_from_this()); return Status::OK(); } @@ -529,8 +534,10 @@ InvertedIndexReaderType StringTypeInvertedIndexReader::type() { Status BkdIndexReader::new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = - InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); + if (*iterator == nullptr) { + *iterator = InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state); + } + (*iterator)->add_reader(InvertedIndexReaderType::BKD, shared_from_this()); return Status::OK(); } @@ -1151,24 +1158,36 @@ lucene::util::bkd::relation InvertedIndexVisitor::compare(std::vector& bit_map, bool skip_try) { + const vectorized::IndexFieldNameAndTypePair& name_with_type, const void* query_value, + InvertedIndexQueryType query_type, uint32_t segment_num_rows, + std::shared_ptr& bit_map, bool skip_try) { DBUG_EXECUTE_IF("return_inverted_index_bypass", { return Status::Error("inverted index bypass"); }); - if (UNLIKELY(_reader == nullptr)) { + auto reader = DORIS_TRY(_select_best_reader(name_with_type.second, query_type)); + if (UNLIKELY(reader == nullptr)) { throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); } - if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) { + if (!skip_try && reader->type() == InvertedIndexReaderType::BKD) { if (_runtime_state != nullptr && _runtime_state->query_options().inverted_index_skip_threshold > 0 && _runtime_state->query_options().inverted_index_skip_threshold < 100) { auto query_bkd_limit_percent = _runtime_state->query_options().inverted_index_skip_threshold; uint32_t hit_count = 0; - RETURN_IF_ERROR( - try_read_from_inverted_index(column_name, query_value, query_type, &hit_count)); + RETURN_IF_ERROR(try_read_from_inverted_index(reader, name_with_type.first, query_value, + query_type, &hit_count)); if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) { return Status::Error( "hit count: {}, bkd inverted reached limit {}% , segment num " @@ -1179,8 +1198,8 @@ Status InvertedIndexIterator::read_from_inverted_index( } auto execute_query = [&]() { - return _reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value, - query_type, bit_map); + return reader->query(&_io_ctx, _stats, _runtime_state, name_with_type.first, query_value, + query_type, bit_map); }; if (_runtime_state->query_options().enable_profile) { @@ -1189,7 +1208,7 @@ Status InvertedIndexIterator::read_from_inverted_index( SCOPED_RAW_TIMER(&query_stats.exec_time); RETURN_IF_ERROR(execute_query()); } - query_stats.column_name = column_name; + query_stats.column_name = name_with_type.first; query_stats.hit_rows = bit_map->cardinality(); _stats->inverted_index_stats.stats.emplace_back(query_stats); } else { @@ -1199,7 +1218,8 @@ Status InvertedIndexIterator::read_from_inverted_index( return Status::OK(); } -Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& column_name, +Status InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, + const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t* count) { @@ -1209,18 +1229,84 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY || query_type == InvertedIndexQueryType::LESS_THAN_QUERY || query_type == InvertedIndexQueryType::EQUAL_QUERY) { - RETURN_IF_ERROR(_reader->try_query(&_io_ctx, _stats, _runtime_state, column_name, - query_value, query_type, count)); + RETURN_IF_ERROR(reader->try_query(&_io_ctx, _stats, _runtime_state, column_name, + query_value, query_type, count)); } return Status::OK(); } -InvertedIndexReaderType InvertedIndexIterator::get_inverted_index_reader_type() const { - return _reader->type(); +Status InvertedIndexIterator::read_null_bitmap(InvertedIndexQueryCacheHandle* cache_handle, + lucene::store::Directory* dir) { + auto reader = DORIS_TRY(_select_best_reader()); + return reader->read_null_bitmap(&_io_ctx, _stats, cache_handle, dir); +} + +Result InvertedIndexIterator::has_null() { + auto reader = DORIS_TRY(_select_best_reader()); + return reader->has_null(); +}; + +Result InvertedIndexIterator::_select_best_reader( + const vectorized::DataTypePtr& column_type, InvertedIndexQueryType query_type) { + if (_readers.empty()) { + return ResultError(Status::RuntimeError( + "No available inverted index readers. Check if index is properly initialized.")); + } + + // Only one BKD index allowed per numeric type + if (_readers.size() == 1) { + return _readers.begin()->second; + } + + const auto& column_desc = column_type->get_type_as_type_descriptor(); + const auto field_type = TabletColumn::get_field_type_by_type(column_desc.type); + + // Check for string types (including arrays of strings) + const bool is_string = + is_string_type(field_type) || + (column_desc.type == TYPE_ARRAY && + is_string_type(TabletColumn::get_field_type_by_type(column_desc.children[0].type))); + + InvertedIndexReaderType preferred_type = InvertedIndexReaderType::UNKNOWN; + // Handle string type columns + if (is_string) { + if (is_match_query(query_type)) { + preferred_type = InvertedIndexReaderType::FULLTEXT; + } else if (is_equal_query(query_type)) { + preferred_type = InvertedIndexReaderType::STRING_TYPE; + } + } + DBUG_EXECUTE_IF("inverted_index_reader._select_best_reader", { + auto type = DebugPoints::instance()->get_debug_param_or_default( + "inverted_index_reader._select_best_reader", "type", -1); + if ((int32_t)preferred_type != type) { + return ResultError(Status::RuntimeError( + "Inverted index reader type mismatch. Expected={}, Actual={}", + (int32_t)preferred_type, type)); + } + }) + + if (auto reader = get_reader(preferred_type)) { + return reader; + } + + return ResultError(Status::RuntimeError("Index query type not supported")); +} + +Result InvertedIndexIterator::_select_best_reader() { + if (_readers.empty()) { + return ResultError(Status::RuntimeError( + "No available inverted index readers. Check if index is properly initialized.")); + } + return _readers.begin()->second; } -const std::map& InvertedIndexIterator::get_index_properties() const { - return _reader->get_index_properties(); +InvertedIndexReaderPtr InvertedIndexIterator::get_reader(InvertedIndexReaderType type) { + auto iter = _readers.find(type); + if (iter == _readers.end()) { + return nullptr; + } + return iter->second; } template class InvertedIndexVisitor; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 6ee293547431b7..87d7460a05aa43 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -449,35 +449,37 @@ class InvertedIndexIterator { public: InvertedIndexIterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, - RuntimeState* runtime_state, std::shared_ptr reader) - : _io_ctx(io_ctx), - _stats(stats), - _runtime_state(runtime_state), - _reader(std::move(reader)) {} - - Status read_from_inverted_index(const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t segment_num_rows, + RuntimeState* runtime_state); + + void add_reader(InvertedIndexReaderType type, const InvertedIndexReaderPtr& reader); + + Status read_from_inverted_index(const vectorized::IndexFieldNameAndTypePair& name_with_type, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t segment_num_rows, + std::shared_ptr& bit_map, bool skip_try = false); - Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, + + Status try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, + const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t* count); Status read_null_bitmap(InvertedIndexQueryCacheHandle* cache_handle, - lucene::store::Directory* dir = nullptr) { - return _reader->read_null_bitmap(&_io_ctx, _stats, cache_handle, dir); - } + lucene::store::Directory* dir = nullptr); - [[nodiscard]] InvertedIndexReaderType get_inverted_index_reader_type() const; - [[nodiscard]] const std::map& get_index_properties() const; - [[nodiscard]] bool has_null() { return _reader->has_null(); }; + [[nodiscard]] Result has_null(); - const InvertedIndexReaderPtr& reader() { return _reader; } + InvertedIndexReaderPtr get_reader(InvertedIndexReaderType type); private: + Result _select_best_reader(const vectorized::DataTypePtr& column_type, + InvertedIndexQueryType query_type); + Result _select_best_reader(); + io::IOContext _io_ctx; OlapReaderStatistics* _stats = nullptr; RuntimeState* _runtime_state = nullptr; - std::shared_ptr _reader; + std::unordered_map _readers; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 951188b496986a..292a53c7e5f3ba 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -72,11 +72,7 @@ const float MAXMBSortInHeap = 512.0 * 8; const int DIMS = 1; bool InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn& column) { - // bellow types are not supported in inverted index for extracted columns - static std::set invalid_types = {FieldType::OLAP_FIELD_TYPE_DOUBLE, - FieldType::OLAP_FIELD_TYPE_JSONB, - FieldType::OLAP_FIELD_TYPE_FLOAT}; - if (invalid_types.contains(column.type())) { + if (!check_support_inverted_index(column.type())) { return false; } if (column.is_variant_type()) { @@ -90,6 +86,14 @@ bool InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn& return true; } +bool InvertedIndexColumnWriter::check_support_inverted_index(FieldType type) { + // bellow types are not supported in inverted index for extracted columns + static std::set invalid_types = {FieldType::OLAP_FIELD_TYPE_DOUBLE, + FieldType::OLAP_FIELD_TYPE_JSONB, + FieldType::OLAP_FIELD_TYPE_FLOAT}; + return !invalid_types.contains(type); +} + template class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { public: diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_writer.h index a8f719ee1268c6..875a1c6c0b9c08 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.h @@ -76,6 +76,8 @@ class InvertedIndexColumnWriter { // are generated from variant, but not all of them are supported static bool check_support_inverted_index(const TabletColumn& column); + static bool check_support_inverted_index(FieldType); + private: DISALLOW_COPY_AND_ASSIGN(InvertedIndexColumnWriter); }; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 80c71b78a9824a..fa8616d37ed8e6 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -776,8 +776,8 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { // UNTOKENIZED strings exceed ignore_above, they are written as null, causing range query errors if (PredicateTypeTraits::is_range(pred->type()) && _inverted_index_iterators[pred_column_id] != nullptr && - _inverted_index_iterators[pred_column_id]->get_inverted_index_reader_type() == - InvertedIndexReaderType::STRING_TYPE) { + _inverted_index_iterators[pred_column_id]->get_reader( + InvertedIndexReaderType::STRING_TYPE)) { return false; } @@ -854,9 +854,9 @@ bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) } bool SegmentIterator::_column_has_fulltext_index(int32_t cid) { - bool has_fulltext_index = _inverted_index_iterators[cid] != nullptr && - _inverted_index_iterators[cid]->get_inverted_index_reader_type() == - InvertedIndexReaderType::FULLTEXT; + bool has_fulltext_index = + _inverted_index_iterators[cid] != nullptr && + _inverted_index_iterators[cid]->get_reader(InvertedIndexReaderType::FULLTEXT); return has_fulltext_index; } @@ -946,18 +946,13 @@ bool SegmentIterator::_need_read_data(ColumnId cid) { Status SegmentIterator::_apply_inverted_index() { std::vector remaining_predicates; - std::set no_need_to_pass_column_predicate_set; - for (auto pred : _col_predicates) { - if (no_need_to_pass_column_predicate_set.count(pred) > 0) { - continue; - } else { - bool continue_apply = true; - RETURN_IF_ERROR(_apply_inverted_index_on_column_predicate(pred, remaining_predicates, - &continue_apply)); - if (!continue_apply) { - break; - } + for (auto* pred : _col_predicates) { + bool continue_apply = true; + RETURN_IF_ERROR(_apply_inverted_index_on_column_predicate(pred, remaining_predicates, + &continue_apply)); + if (!continue_apply) { + break; } } @@ -1082,21 +1077,24 @@ Status SegmentIterator::_init_inverted_index_iterators() { // This is because the sub-column is created in create_materialized_variant_column. // We use this column to locate the metadata for the inverted index, which requires a unique_id and path. const auto& column = _opts.tablet_schema->column(cid); - const TabletIndex* index_meta = nullptr; + std::vector inverted_indexs; + // If the column is an extracted column, we need to find the sub-column in the parent column reader. if (column.is_extracted_column()) { if (_segment->_column_readers.find(column.parent_unique_id()) == _segment->_column_readers.end()) { continue; } auto* column_reader = _segment->_column_readers.at(column.parent_unique_id()).get(); - index_meta = assert_cast(column_reader) - ->find_subcolumn_tablet_index(column.suffix_path()); - } else { - index_meta = _segment->_tablet_schema->inverted_index(column.unique_id()); + inverted_indexs = assert_cast(column_reader) + ->find_subcolumn_tablet_indexes(column.suffix_path()); + } + // If the column is not an extracted column, we can directly get the inverted index metadata from the tablet schema. + else { + inverted_indexs = _segment->_tablet_schema->inverted_indexs(column); } - if (index_meta) { + for (const auto& inverted_index : inverted_indexs) { RETURN_IF_ERROR(_segment->new_inverted_index_iterator( - column, index_meta, _opts, &_inverted_index_iterators[cid])); + column, inverted_index, _opts, &_inverted_index_iterators[cid])); } } } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 5d6763afd2a7de..d6e0318871595c 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -223,13 +223,16 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co if (_opts.write_type == DataWriteType::TYPE_DIRECT && schema->skip_write_index_on_load()) { skip_inverted_index = true; } - // indexes for this column - if (const auto& index = schema->inverted_index(column); - index != nullptr && !skip_inverted_index) { - opts.inverted_index = index; - opts.need_inverted_index = true; - DCHECK(_inverted_index_file_writer != nullptr); + if (!skip_inverted_index) { + auto inverted_indexs = schema->inverted_indexs(column); + if (!inverted_indexs.empty()) { + for (const auto& index : inverted_indexs) { + opts.inverted_indexs.emplace_back(index); + } + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + } } opts.inverted_index_file_writer = _inverted_index_file_writer; diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp index c0edc586ac5bcb..1e50c0ea45bc90 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp @@ -75,8 +75,8 @@ Status _create_column_writer(uint32_t cid, const TabletColumn& column, const TabletSchemaSPtr& tablet_schema, InvertedIndexFileWriter* inverted_index_file_writer, std::unique_ptr* writer, - std::unique_ptr& subcolumn_index, - ColumnWriterOptions* opt, int64_t none_null_value_size) { + TabletIndexes& subcolumn_indexes, ColumnWriterOptions* opt, + int64_t none_null_value_size) { _init_column_meta(opt->meta, cid, column, opt->compression_type); // no need to record none null value size for typed column if (!column.path_info_ptr()->get_is_typed()) { @@ -86,27 +86,43 @@ Status _create_column_writer(uint32_t cid, const TabletColumn& column, opt->need_zone_map = tablet_schema->keys_type() != KeysType::AGG_KEYS; opt->need_bloom_filter = column.is_bf_column(); opt->need_bitmap_index = column.has_bitmap_index(); - const auto& index = tablet_schema->inverted_index(column.parent_unique_id()); + const auto& parent_index = tablet_schema->inverted_indexs(column.parent_unique_id()); VLOG_DEBUG << "column: " << column.name() << " need_inverted_index: " << opt->need_inverted_index << " need_bloom_filter: " << opt->need_bloom_filter << " need_bitmap_index: " << opt->need_bitmap_index; // init inverted index - // index denotes the index of the entire variant column + // parent_index denotes the index of the entire variant column // while subcolumn_index denotes the current subcolumn's index - if ((index != nullptr || subcolumn_index != nullptr) && - segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(column)) { - // inheriting the variant column's index when the subcolumn index is absent - if (subcolumn_index == nullptr) { - subcolumn_index = std::make_unique(*index); - subcolumn_index->set_escaped_escaped_index_suffix_path( - column.path_info_ptr()->get_path()); + if (segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(column)) { + auto init_opt_inverted_index = [&]() { + DCHECK(!subcolumn_indexes.empty()); + for (const auto& index : subcolumn_indexes) { + opt->inverted_indexs.push_back(index.get()); + } + opt->need_inverted_index = true; + DCHECK(inverted_index_file_writer != nullptr); + opt->inverted_index_file_writer = inverted_index_file_writer; + }; + + // the subcolumn index is already initialized + if (!subcolumn_indexes.empty()) { + init_opt_inverted_index(); + } + // the subcolumn index is not initialized, but the parent index is present + else if (!parent_index.empty()) { + for (const auto& index : parent_index) { + subcolumn_indexes.push_back(std::make_unique(*index)); + subcolumn_indexes.back()->set_escaped_escaped_index_suffix_path( + column.path_info_ptr()->get_path()); + } + init_opt_inverted_index(); + } + // no parent index and no subcolumn index + else { + opt->need_inverted_index = false; } - opt->inverted_index = subcolumn_index.get(); - opt->need_inverted_index = true; - DCHECK(inverted_index_file_writer != nullptr); - opt->inverted_index_file_writer = inverted_index_file_writer; } #define DISABLE_INDEX_IF_FIELD_TYPE(TYPE, type_name) \ @@ -352,7 +368,7 @@ Status VariantColumnWriterImpl::_process_subcolumns(vectorized::ColumnObject* pt vectorized::ColumnPtr current_column; if (auto current_path = entry->path.get_path(); _subcolumns_info.find(current_path) != _subcolumns_info.end()) { - tablet_column = _subcolumns_info[current_path].column; + tablet_column = std::move(_subcolumns_info[current_path].column); vectorized::DataTypePtr storage_type = vectorized::DataTypeFactory::instance().create_data_type(tablet_column); vectorized::DataTypePtr finalized_type = entry->data.get_least_common_type(); @@ -361,9 +377,8 @@ Status VariantColumnWriterImpl::_process_subcolumns(vectorized::ColumnObject* pt RETURN_IF_ERROR(vectorized::schema_util::cast_column( {current_column, finalized_type, ""}, storage_type, ¤t_column)); } - if (auto index = _subcolumns_info[current_path].index; index != nullptr) { - _subcolumns_indexes[current_column_id] = std::make_unique(*index); - } + _subcolumns_indexes[current_column_id] = + std::move(_subcolumns_info[current_path].indexes); const auto& null_data = assert_cast(*current_column) .get_null_map_data(); none_null_value_size = @@ -720,8 +735,8 @@ Status VariantSubcolumnWriter::finalize() { << " is_bf_column: " << parent_column.is_bf_column() << " " << flush_column.is_bf_column(); RETURN_IF_ERROR(_create_column_writer(0, flush_column, _opts.rowset_ctx->tablet_schema, - _opts.inverted_index_file_writer, &_writer, _index, &opts, - none_null_value_size)); + _opts.inverted_index_file_writer, &_writer, _indexes, + &opts, none_null_value_size)); _opts = opts; auto olap_data_convertor = std::make_unique(); int column_id = 0; diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.h b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.h index cfb17b9ac65e3f..7fd8e8d253ce74 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.h +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.h @@ -94,7 +94,7 @@ class VariantColumnWriterImpl { VariantStatistics _statistics; // hold the references of subcolumns indexes - std::vector> _subcolumns_indexes; + std::vector _subcolumns_indexes; // hold the references of subcolumns info std::unordered_map _subcolumns_info; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 6e239e689966e5..937aa7e42b82d4 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -221,11 +221,15 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo tablet_schema->skip_write_index_on_load()) { skip_inverted_index = true; } - if (const auto& index = tablet_schema->inverted_index(column); - index != nullptr && !skip_inverted_index) { - opts.inverted_index = index; - opts.need_inverted_index = true; - DCHECK(_inverted_index_file_writer != nullptr); + if (!skip_inverted_index) { + auto inverted_indexs = tablet_schema->inverted_indexs(column); + if (!inverted_indexs.empty()) { + for (const auto& index : inverted_indexs) { + opts.inverted_indexs.emplace_back(index); + } + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + } } opts.inverted_index_file_writer = _inverted_index_file_writer; diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 5e5f4b47580c94..be0bd6f24d24ec 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -922,25 +922,33 @@ void TabletColumn::append_sparse_column(TabletColumn column) { } void TabletSchema::append_index(TabletIndex&& index) { + int32_t index_pos = _indexes.size(); _indexes.push_back(std::make_shared(index)); - if (auto field_pattern = _indexes.back()->field_pattern(); - !field_pattern.empty() && !_indexes.back()->col_unique_ids().empty()) { - auto& pattern_to_index_map = - _index_by_unique_id_with_pattern[_indexes.back()->col_unique_ids()[0]]; - pattern_to_index_map[field_pattern] = _indexes.back(); + for (int32_t id : _indexes.back()->col_unique_ids()) { + if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { + auto& pattern_to_index_map = _index_by_unique_id_with_pattern[id]; + pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); + } else { + IndexKey key = std::make_tuple(_indexes.back()->index_type(), id, + _indexes.back()->get_index_suffix()); + _col_id_suffix_to_index[key].push_back(index_pos); + } } } void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type, - TabletIndex&& index) { + std::vector&& indexes) { int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); const std::string& suffix_path = escape_for_path_name(col.suffix_path()); - for (auto& _indexe : _indexes) { - for (int32_t id : _indexe->col_unique_ids()) { - if (_indexe->index_type() == index_type && id == col_unique_id && - _indexe->get_index_suffix() == suffix_path) { - _indexe = std::make_shared(std::move(index)); - break; + IndexKey key(index_type, col_unique_id, suffix_path); + auto iter = _col_id_suffix_to_index.find(key); + if (iter != _col_id_suffix_to_index.end()) { + if (iter->second.size() == indexes.size()) { + for (size_t i = 0; i < iter->second.size(); ++i) { + int32_t pos = iter->second[i]; + if (pos >= 0 && pos < _indexes.size()) { + _indexes[pos] = std::make_shared(std::move(indexes[i])); + } } } } @@ -961,17 +969,28 @@ void TabletSchema::clear_index_cache_handlers() { void TabletSchema::clear_index() { clear_index_cache_handlers(); _indexes.clear(); + _index_by_unique_id_with_pattern.clear(); + _col_id_suffix_to_index.clear(); } void TabletSchema::remove_index(int64_t index_id) { - std::vector indexes; - for (auto index : _indexes) { - if (index->index_id() == index_id) { - continue; + std::vector new_indexes; + for (auto& index : _indexes) { + if (index->index_id() != index_id) { + new_indexes.emplace_back(std::move(index)); } - indexes.emplace_back(std::move(index)); } - _indexes = std::move(indexes); + + _col_id_suffix_to_index.clear(); + for (size_t new_pos = 0; new_pos < new_indexes.size(); ++new_pos) { + const auto& index = new_indexes[new_pos]; + for (int32_t col_uid : index->col_unique_ids()) { + IndexKey key = std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()); + _col_id_suffix_to_index[key].push_back(new_pos); + } + } + + _indexes = std::move(new_indexes); } void TabletSchema::clear_columns() { @@ -1005,6 +1024,8 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _field_name_to_index.clear(); _field_id_to_index.clear(); _cluster_key_uids.clear(); + _index_by_unique_id_with_pattern.clear(); + _col_id_suffix_to_index.clear(); clear_column_cache_handlers(); clear_index_cache_handlers(); for (const auto& i : schema.cluster_key_uids()) { @@ -1055,12 +1076,17 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac index = std::make_shared(); index->init_from_pb(index_pb); } + int32_t index_pos = _indexes.size(); _indexes.emplace_back(std::move(index)); - if (auto field_pattern = _indexes.back()->field_pattern(); - !field_pattern.empty() && !_indexes.back()->col_unique_ids().empty()) { - auto& pattern_to_index_map = - _index_by_unique_id_with_pattern[_indexes.back()->col_unique_ids()[0]]; - pattern_to_index_map[field_pattern] = _indexes.back(); + for (int32_t col_uid : _indexes.back()->col_unique_ids()) { + if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { + auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; + pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); + } else { + IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid, + _indexes.back()->get_index_suffix()); + _col_id_suffix_to_index[key].push_back(index_pos); + } } } _num_short_key_columns = schema.num_short_key_columns(); @@ -1186,6 +1212,8 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _indexes.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); + _index_by_unique_id_with_pattern.clear(); + _col_id_suffix_to_index.clear(); _delete_sign_idx = -1; _sequence_col_idx = -1; _version_col_idx = -1; @@ -1224,11 +1252,18 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version } for (const auto& i : index->indexes) { + int32_t index_pos = _indexes.size(); _indexes.emplace_back(std::make_shared(*i)); - if (auto field_pattern = i->field_pattern(); - !field_pattern.empty() && !i->col_unique_ids().empty()) { - auto& pattern_to_index_map = _index_by_unique_id_with_pattern[i->col_unique_ids()[0]]; - pattern_to_index_map[field_pattern] = _indexes.back(); + + for (int32_t col_uid : _indexes.back()->col_unique_ids()) { + if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { + auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; + pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); + } else { + IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid, + _indexes.back()->get_index_suffix()); + _col_id_suffix_to_index[key].push_back(index_pos); + } } } @@ -1423,7 +1458,15 @@ void TabletSchema::update_indexes_from_thrift(const std::vectorcol_unique_ids().empty()) { auto& pattern_to_index_map = _index_by_unique_id_with_pattern[index->col_unique_ids()[0]]; - pattern_to_index_map[field_pattern] = index; + pattern_to_index_map[field_pattern].emplace_back(index); + } + } + std::unordered_map, IndexKeyHash> col_id_suffix_to_index; + for (size_t i = 0; i < _indexes.size(); i++) { + for (int32_t col_uid : _indexes[i]->col_unique_ids()) { + IndexKey key = std::make_tuple(_indexes[i]->index_type(), col_uid, + _indexes[i]->get_index_suffix()); + col_id_suffix_to_index[key].push_back(i); } } } @@ -1478,62 +1521,65 @@ bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { return false; } -const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, - const std::string& suffix_path) const { +std::vector TabletSchema::inverted_indexs( + int32_t col_unique_id, const std::string& suffix_path) const { + std::vector result; const std::string escaped_suffix = escape_for_path_name(suffix_path); - for (const auto& _index : _indexes) { - if (_index->index_type() == IndexType::INVERTED) { - for (int32_t id : _index->col_unique_ids()) { - if (id == col_unique_id && _index->get_index_suffix() == escaped_suffix && - _index->field_pattern().empty()) { - return _index.get(); - } + auto it = _col_id_suffix_to_index.find( + std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix)); + if (it != _col_id_suffix_to_index.end()) { + for (int32_t pos : it->second) { + if (pos >= 0 && pos < _indexes.size()) { + result.push_back(_indexes[pos].get()); } } } - return nullptr; + return result; } -TabletIndexPtr TabletSchema::inverted_index_by_field_pattern( +std::vector TabletSchema::inverted_index_by_field_pattern( int32_t col_unique_id, const std::string& field_pattern) const { auto id_to_pattern_map = _index_by_unique_id_with_pattern.find(col_unique_id); if (id_to_pattern_map == _index_by_unique_id_with_pattern.end()) { - return nullptr; + return {}; } auto pattern_to_index_map = id_to_pattern_map->second.find(field_pattern); if (pattern_to_index_map == id_to_pattern_map->second.end()) { - return nullptr; + return {}; } return pattern_to_index_map->second; } -const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { +std::vector TabletSchema::inverted_indexs(const TabletColumn& col) const { // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { - return nullptr; + return {}; } // TODO use more efficient impl // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - if (const TabletIndex* index = - inverted_index(col_unique_id, escape_for_path_name(col.suffix_path())); - index != nullptr) { - return index; + std::vector result; + if (result = inverted_indexs(col_unique_id, escape_for_path_name(col.suffix_path())); + !result.empty()) { + return result; } // variant's typed column has it's own index else if (col.is_extracted_column()) { std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path(); if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) { - return nullptr; + return result; } const auto& path_set_info = _path_set_info_map.at(col_unique_id); if (path_set_info.typed_path_set.find(relative_path) == path_set_info.typed_path_set.end()) { - return nullptr; + return result; + } + for (const auto& index : path_set_info.typed_path_set.at(relative_path).indexes) { + result.push_back(index.get()); } - return path_set_info.typed_path_set.at(relative_path).index.get(); + return result; } - return nullptr; + return result; } bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { @@ -1552,14 +1598,12 @@ bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { } const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const { - // TODO use more efficient impl - for (const auto& _index : _indexes) { - if (_index->index_type() == IndexType::NGRAM_BF) { - for (int32_t id : _index->col_unique_ids()) { - if (id == col_unique_id) { - return _index.get(); - } - } + // Get the ngram bf index for the given column unique id + IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); + auto it = _col_id_suffix_to_index.find(index_key); + if (it != _col_id_suffix_to_index.end()) { + if (!it->second.empty()) { + return _indexes[it->second[0]].get(); } } return nullptr; diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 56bc1c06c96c4e..ffd98d0390f06a 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -324,9 +324,12 @@ class TabletIndex : public MetadataAdder { IndexType _index_type; std::vector _col_unique_ids; std::map _properties; + + friend class TabletSchemaMultiIndexTest; }; using TabletIndexPtr = std::shared_ptr; +using TabletIndexes = std::vector>; class TabletSchema : public MetadataAdder { public: @@ -356,7 +359,8 @@ class TabletSchema : public MetadataAdder { void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex&& index); - void update_index(const TabletColumn& column, const IndexType& index_type, TabletIndex&& index); + void update_index(const TabletColumn& column, const IndexType& index_type, + std::vector&& indexs); void remove_index(int64_t index_id); void clear_index(); // Must make sure the row column is always the last column @@ -472,14 +476,14 @@ class TabletSchema : public MetadataAdder { bool has_inverted_index_with_index_id(int64_t index_id) const; // Check whether this column supports inverted index // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabletIndex. - const TabletIndex* inverted_index(const TabletColumn& col) const; + std::vector inverted_indexs(const TabletColumn& col) const; // Regardless of whether this column supports inverted index // TabletIndex information will be returned as long as it exists. - const TabletIndex* inverted_index(int32_t col_unique_id, - const std::string& suffix_path = "") const; - TabletIndexPtr inverted_index_by_field_pattern(int32_t col_unique_id, - const std::string& field_pattern) const; + std::vector inverted_indexs(int32_t col_unique_id, + const std::string& suffix_path = "") const; + std::vector inverted_index_by_field_pattern( + int32_t col_unique_id, const std::string& field_pattern) const; bool has_ngram_bf_index(int32_t col_unique_id) const; const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const; void update_indexes_from_thrift(const std::vector& indexes); @@ -591,9 +595,10 @@ class TabletSchema : public MetadataAdder { struct SubColumnInfo { TabletColumn column; - TabletIndexPtr index; + TabletIndexes indexes; }; + // all path in path_set_info are relative to the parent column struct PathsSetInfo { std::unordered_map typed_path_set; // typed columns PathSet sub_path_set; // extracted columns @@ -630,6 +635,23 @@ class TabletSchema : public MetadataAdder { std::unordered_map _field_id_to_index; std::unordered_map _field_path_to_index; + + // index_type/col_unique_id/suffix -> idxs in _indexes + using IndexKey = std::tuple; + struct IndexKeyHash { + size_t operator()(const IndexKey& t) const { + std::size_t seed = 0; + seed = doris::HashUtil::hash((const char*)&std::get<0>(t), sizeof(std::get<0>(t)), + seed); + seed = doris::HashUtil::hash((const char*)&std::get<1>(t), sizeof(std::get<1>(t)), + seed); + seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(), std::get<2>(t).size(), + seed); + return seed; + } + }; + std::unordered_map, IndexKeyHash> _col_id_suffix_to_index; + size_t _num_columns = 0; size_t _num_variant_columns = 0; size_t _num_key_columns = 0; @@ -673,7 +695,7 @@ class TabletSchema : public MetadataAdder { // key: field_pattern // value: index - using PatternToIndex = std::unordered_map; + using PatternToIndex = std::unordered_map>; std::unordered_map _index_by_unique_id_with_pattern; }; diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 817486679b273b..7f6e97d0980e01 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -114,34 +114,36 @@ Status IndexBuilder::update_inverted_index_info() { } } auto column = output_rs_tablet_schema->column(column_idx); - const auto* index_meta = output_rs_tablet_schema->inverted_index(column); - if (index_meta == nullptr) { + auto index_metas = output_rs_tablet_schema->inverted_indexs(column); + if (index_metas.empty()) { LOG(ERROR) << "failed to find column: " << column_name << " index_id: " << t_inverted_index.index_id; continue; } - if (output_rs_tablet_schema->get_inverted_index_storage_format() == - InvertedIndexStorageFormatPB::V1) { - const auto& fs = io::global_local_filesystem(); - - for (int seg_id = 0; seg_id < num_segments; seg_id++) { - auto seg_path = - local_segment_path(_tablet->tablet_path(), - input_rowset->rowset_id().to_string(), seg_id); - auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( - InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), - index_meta->index_id(), index_meta->get_index_suffix()); - int64_t index_size = 0; - RETURN_IF_ERROR(fs->file_size(index_path, &index_size)); - VLOG_DEBUG << "inverted index file:" << index_path - << " size:" << index_size; - drop_index_size += index_size; + for (const auto& index_meta : index_metas) { + if (output_rs_tablet_schema->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + const auto& fs = io::global_local_filesystem(); + + for (int seg_id = 0; seg_id < num_segments; seg_id++) { + auto seg_path = local_segment_path( + _tablet->tablet_path(), input_rowset->rowset_id().to_string(), + seg_id); + auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), + index_meta->index_id(), index_meta->get_index_suffix()); + int64_t index_size = 0; + RETURN_IF_ERROR(fs->file_size(index_path, &index_size)); + VLOG_DEBUG << "inverted index file:" << index_path + << " size:" << index_size; + drop_index_size += index_size; + } } + _dropped_inverted_indexes.push_back(*index_meta); + // ATTN: DO NOT REMOVE INDEX AFTER OUTPUT_ROWSET_WRITER CREATED. + // remove dropped index_meta from output rowset tablet schema + output_rs_tablet_schema->remove_index(index_meta->index_id()); } - _dropped_inverted_indexes.push_back(*index_meta); - // ATTN: DO NOT REMOVE INDEX AFTER OUTPUT_ROWSET_WRITER CREATED. - // remove dropped index_meta from output rowset tablet schema - output_rs_tablet_schema->remove_index(index_meta->index_id()); } DBUG_EXECUTE_IF("index_builder.update_inverted_index_info.drop_index", { auto indexes_count = DebugPoints::instance()->get_debug_param_or_default( @@ -171,15 +173,17 @@ Status IndexBuilder::update_inverted_index_info() { continue; } const TabletColumn& col = output_rs_tablet_schema->column_by_uid(column_uid); - const TabletIndex* exist_index = output_rs_tablet_schema->inverted_index(col); - if (exist_index && exist_index->index_id() != index.index_id()) { - LOG(WARNING) << fmt::format( - "column: {} has a exist inverted index, but the index id not equal " - "request's index id, , exist index id: {}, request's index id: {}, " - "remove exist index in new output_rs_tablet_schema", - column_uid, exist_index->index_id(), index.index_id()); - without_index_uids.insert(exist_index->index_id()); - output_rs_tablet_schema->remove_index(exist_index->index_id()); + auto exist_indexs = output_rs_tablet_schema->inverted_indexs(col); + for (const auto& exist_index : exist_indexs) { + if (exist_index->index_id() != index.index_id()) { + LOG(WARNING) << fmt::format( + "column: {} has a exist inverted index, but the index id not equal " + "request's index id, , exist index id: {}, request's index id: {}, " + "remove exist index in new output_rs_tablet_schema", + column_uid, exist_index->index_id(), index.index_id()); + without_index_uids.insert(exist_index->index_id()); + output_rs_tablet_schema->remove_index(exist_index->index_id()); + } } output_rs_tablet_schema->append_index(std::move(index)); } @@ -425,28 +429,33 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta _olap_data_convertor->add_column_data_convertor(column); return_columns.emplace_back(column_idx); std::unique_ptr field(FieldFactory::create(column)); - const auto* index_meta = output_rowset_schema->inverted_index(column); - std::unique_ptr inverted_index_builder; - try { - RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create( - field.get(), &inverted_index_builder, inverted_index_file_writer.get(), - index_meta)); - DBUG_EXECUTE_IF( - "IndexBuilder::handle_single_rowset_index_column_writer_create_error", { - _CLTHROWA(CL_ERR_IO, - "debug point: " - "handle_single_rowset_index_column_writer_create_error"); - }) - } catch (const std::exception& e) { - return Status::Error( - "CLuceneError occured: {}", e.what()); - } + auto index_metas = output_rowset_schema->inverted_indexs(column); + for (const auto& index_meta : index_metas) { + std::unique_ptr inverted_index_builder; + try { + RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create( + field.get(), &inverted_index_builder, + inverted_index_file_writer.get(), index_meta)); + DBUG_EXECUTE_IF( + "IndexBuilder::handle_single_rowset_index_column_writer_create_" + "error", + { + _CLTHROWA(CL_ERR_IO, + "debug point: " + "handle_single_rowset_index_column_writer_create_" + "error"); + }) + } catch (const std::exception& e) { + return Status::Error( + "CLuceneError occured: {}", e.what()); + } - if (inverted_index_builder) { - auto writer_sign = std::make_pair(seg_ptr->id(), index_id); - _inverted_index_builders.insert( - std::make_pair(writer_sign, std::move(inverted_index_builder))); - inverted_index_writer_signs.emplace_back(writer_sign); + if (inverted_index_builder) { + auto writer_sign = std::make_pair(seg_ptr->id(), index_id); + _inverted_index_builders.insert( + std::make_pair(writer_sign, std::move(inverted_index_builder))); + inverted_index_writer_signs.emplace_back(writer_sign); + } } } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index c2f1022c4a3191..de090b65367815 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -444,19 +444,20 @@ void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, } // 2. inverted index - const auto* source_index_meta = (*target_schema)->inverted_index(source.unique_id()); - if (source_index_meta != nullptr) { - // add index meta + std::vector indexes_to_update; + auto source_indexes = (*target_schema)->inverted_indexs(source.unique_id()); + for (const auto& source_index_meta : source_indexes) { TabletIndex index_info = *source_index_meta; index_info.set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path()); - const auto* target_index_meta = - (*target_schema) - ->inverted_index(target.parent_unique_id(), - target.path_info_ptr()->get_path()); - if (target_index_meta != nullptr) { - // already exist - (*target_schema)->update_index(target, IndexType::INVERTED, std::move(index_info)); - } else { + indexes_to_update.emplace_back(std::move(index_info)); + } + auto target_indexes = (*target_schema) + ->inverted_indexs(target.parent_unique_id(), + target.path_info_ptr()->get_path()); + if (!target_indexes.empty()) { + (*target_schema)->update_index(target, IndexType::INVERTED, std::move(indexes_to_update)); + } else { + for (auto& index_info : indexes_to_update) { (*target_schema)->append_index(std::move(index_info)); } } @@ -636,10 +637,20 @@ bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* o return true; } - bool new_schema_has_inverted_index = new_schema->inverted_index(column_new); - bool old_schema_has_inverted_index = old_schema->inverted_index(column_old); + auto new_schema_inverted_indexs = new_schema->inverted_indexs(column_new); + auto old_schema_inverted_indexs = old_schema->inverted_indexs(column_old); + + if (new_schema_inverted_indexs.size() != old_schema_inverted_indexs.size()) { + return true; + } + + for (size_t i = 0; i < new_schema_inverted_indexs.size(); ++i) { + if (new_schema_inverted_indexs[i] != old_schema_inverted_indexs[i]) { + return true; + } + } - return new_schema_has_inverted_index != old_schema_has_inverted_index; + return false; } TabletColumn create_sparse_column(const TabletColumn& variant) { @@ -1181,20 +1192,27 @@ bool generate_sub_column_info(const TabletSchema& schema, int32_t col_unique_id, auto generate_index = [&](const std::string& pattern) { // 1. find subcolumn's index - if (const auto& index = schema.inverted_index_by_field_pattern(col_unique_id, pattern); - index != nullptr) { - sub_column_info->index = std::make_shared(*index); - sub_column_info->index->set_escaped_escaped_index_suffix_path( - sub_column_info->column.path_info_ptr()->get_path()); + if (const auto& indexes = schema.inverted_index_by_field_pattern(col_unique_id, pattern); + !indexes.empty()) { + for (const auto& index : indexes) { + auto index_ptr = std::make_shared(*index); + index_ptr->set_escaped_escaped_index_suffix_path( + sub_column_info->column.path_info_ptr()->get_path()); + sub_column_info->indexes.emplace_back(std::move(index_ptr)); + } } // 2. find parent column's index - else if (const auto* parent_index = schema.inverted_index(col_unique_id); - parent_index != nullptr) { - sub_column_info->index = std::make_shared(*parent_index); - sub_column_info->index->set_escaped_escaped_index_suffix_path( - sub_column_info->column.path_info_ptr()->get_path()); + else if (const auto parent_index = schema.inverted_indexs(col_unique_id); + !parent_index.empty() && + InvertedIndexColumnWriter::check_support_inverted_index(sub_column_info->column)) { + for (const auto& index : parent_index) { + auto index_ptr = std::make_shared(*index); + index_ptr->set_escaped_escaped_index_suffix_path( + sub_column_info->column.path_info_ptr()->get_path()); + sub_column_info->indexes.emplace_back(std::move(index_ptr)); + } } else { - sub_column_info->index = nullptr; + sub_column_info->indexes.clear(); } }; diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 7f08bfdd4b4dda..23dda3bdc086ef 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -128,8 +128,7 @@ class FunctionArrayIndex : public IFunction { if (iter == nullptr) { return Status::OK(); } - if (iter->get_inverted_index_reader_type() == - segment_v2::InvertedIndexReaderType::FULLTEXT) { + if (iter->get_reader(segment_v2::InvertedIndexReaderType::FULLTEXT)) { // parser is not none we can not make sure the result is correct in expr combination // for example, filter: !array_index(array, 'tall:120cm, weight: 35kg') // here we have rows [tall:120cm, weight: 35kg, hobbies: reading book] which be tokenized @@ -158,7 +157,7 @@ class FunctionArrayIndex : public IFunction { RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, query_param->get_value(), + data_type_with_name, query_param->get_value(), segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring)); // here debug for check array_contains function really filter rows by inverted index correctly DBUG_EXECUTE_IF("array_func.array_contains", { diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h index 8ac21bcd710f8d..5c6604b204213f 100644 --- a/be/src/vec/functions/array/function_arrays_overlap.h +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -145,8 +145,7 @@ class FunctionArraysOverlap : public IFunction { return Status::OK(); } auto data_type_with_name = data_type_with_names[0]; - if (iter->get_inverted_index_reader_type() == - segment_v2::InvertedIndexReaderType::FULLTEXT) { + if (iter->get_reader(segment_v2::InvertedIndexReaderType::FULLTEXT)) { return Status::Error( "Inverted index evaluate skipped, FULLTEXT reader can not support " "array_overlap"); @@ -193,7 +192,7 @@ class FunctionArraysOverlap : public IFunction { RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( nested_param_type, &nested_query_val, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, query_param->get_value(), + data_type_with_name, query_param->get_value(), segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, single_res)); *roaring |= *single_res; } diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 5fed82d6e10a44..38d17590527857 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -659,7 +659,7 @@ class FunctionIsIPAddressInRange : public IFunction { return Status::OK(); } - if (iter->get_inverted_index_reader_type() != segment_v2::InvertedIndexReaderType::BKD) { + if (iter->get_reader(segment_v2::InvertedIndexReaderType::BKD) == nullptr) { // Not support only bkd index return Status::Error( "Inverted index evaluate skipped, ip range reader can only support by bkd " @@ -717,13 +717,13 @@ class FunctionIsIPAddressInRange : public IFunction { RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( param_type, &min_ip, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, query_param->get_value(), + data_type_with_name, query_param->get_value(), segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY, num_rows, res_roaring)); // <= max ip RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( param_type, &max_ip, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, query_param->get_value(), + data_type_with_name, query_param->get_value(), segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY, num_rows, max_roaring)); DBUG_EXECUTE_IF("ip.inverted_index_filtered", { diff --git a/be/src/vec/functions/function_multi_match.cpp b/be/src/vec/functions/function_multi_match.cpp index 8ab0cb2f2e542c..5e8ce268db6c3e 100644 --- a/be/src/vec/functions/function_multi_match.cpp +++ b/be/src/vec/functions/function_multi_match.cpp @@ -77,14 +77,19 @@ Status FunctionMultiMatch::evaluate_inverted_index( "arguments for multi_match must be string"); } // search - for (int i = 0; i < data_type_with_names.size(); i++) { - auto column_name = data_type_with_names[i].first; + for (size_t i = 0; i < data_type_with_names.size(); i++) { auto* iter = iterators[i]; + if (iter == nullptr) { + std::string error_msg = "Inverted index iterator is null for column '" + + data_type_with_names[i].first + + "' during multi_match execution"; + return Status::Error(error_msg); + } + auto single_result = std::make_shared(); - std::shared_ptr index = std::make_shared(); - RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, &query_str, query_type, - num_rows, index)); - *roaring |= *index; + RETURN_IF_ERROR(iter->read_from_inverted_index(data_type_with_names[i], &query_str, + query_type, num_rows, single_result)); + *roaring |= *single_result; } segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); bitmap_result = result; diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index 2da767fa7ef909..03ce1272057cc6 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -543,8 +543,7 @@ class FunctionComparison : public IFunction { if (iter == nullptr) { return Status::OK(); } - if (iter->get_inverted_index_reader_type() == - segment_v2::InvertedIndexReaderType::FULLTEXT) { + if (iter->get_reader(segment_v2::InvertedIndexReaderType::FULLTEXT)) { //NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate. return Status::OK(); } @@ -565,12 +564,10 @@ class FunctionComparison : public IFunction { } if (segment_v2::is_range_query(query_type) && - iter->get_inverted_index_reader_type() == - segment_v2::InvertedIndexReaderType::STRING_TYPE) { + iter->get_reader(segment_v2::InvertedIndexReaderType::STRING_TYPE)) { // untokenized strings exceed ignore_above, they are written as null, causing range query errors return Status::OK(); } - std::string column_name = data_type_with_name.first; Field param_value; arguments[0].column->get(0, param_value); auto param_type = arguments[0].type->get_type_as_type_descriptor().type; @@ -580,8 +577,8 @@ class FunctionComparison : public IFunction { std::shared_ptr roaring = std::make_shared(); RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( param_type, ¶m_value, query_param)); - RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(), - query_type, num_rows, roaring)); + RETURN_IF_ERROR(iter->read_from_inverted_index( + data_type_with_name, query_param->get_value(), query_type, num_rows, roaring)); std::shared_ptr null_bitmap = std::make_shared(); if (iter->has_null()) { segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 6f697ba7441df5..4875214d948b6c 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -150,8 +150,7 @@ class FunctionIn : public IFunction { if (iter == nullptr) { return Status::OK(); } - if (iter->get_inverted_index_reader_type() == - segment_v2::InvertedIndexReaderType::FULLTEXT) { + if (iter->get_reader(segment_v2::InvertedIndexReaderType::FULLTEXT)) { //NOT support in list when parser is FULLTEXT for expr inverted index evaluate. return Status::OK(); } @@ -160,7 +159,6 @@ class FunctionIn : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::string column_name = data_type_with_name.first; for (const auto& arg : arguments) { Field param_value; arg.column->get(0, param_value); @@ -178,8 +176,8 @@ class FunctionIn : public IFunction { param_type, ¶m_value, query_param)); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; std::shared_ptr index = std::make_shared(); - RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(), - query_type, num_rows, index)); + RETURN_IF_ERROR(iter->read_from_inverted_index( + data_type_with_name, query_param->get_value(), query_type, num_rows, index)); *roaring |= *index; } segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index 8ba4f2f8ebb9cd..cc6e33716f90c8 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -42,8 +42,9 @@ Status FunctionMatchBase::evaluate_inverted_index( if (function_name == MATCH_PHRASE_FUNCTION || function_name == MATCH_PHRASE_PREFIX_FUNCTION || function_name == MATCH_PHRASE_EDGE_FUNCTION) { - if (iter->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT && - get_parser_phrase_support_string_from_properties(iter->get_index_properties()) == + auto reader = iter->get_reader(InvertedIndexReaderType::FULLTEXT); + if (reader && + get_parser_phrase_support_string_from_properties(reader->get_index_properties()) == INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) { return Status::Error( "phrase queries require setting support_phrase = true"); @@ -67,7 +68,7 @@ Status FunctionMatchBase::evaluate_inverted_index( if (is_string_type(param_type)) { auto inverted_index_query_type = get_query_type_from_fn_name(); RETURN_IF_ERROR( - iter->read_from_inverted_index(data_type_with_name.first, query_param->get_value(), + iter->read_from_inverted_index(data_type_with_name, query_param->get_value(), inverted_index_query_type, num_rows, roaring)); } else { return Status::Error( diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp index 33b35e4ef440fe..a21afd7415eaba 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp @@ -113,6 +113,35 @@ class IndexCompactionTest : public ::testing::Test { EXPECT_TRUE(_tablet->init().ok()); } + void _build_multi_index_tablet() { + // tablet_schema + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + + IndexCompactionUtils::construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, + "key_index", 0, "INT", "key"); + IndexCompactionUtils::construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, + "v1_index", 1, "STRING", "v1"); + + IndexCompactionUtils::construct_column(schema_pb.add_column(), 2, "STRING", "v2"); + IndexCompactionUtils::construct_index(schema_pb.add_index(), 10002, "v2_keyword_index", 2, + true); + IndexCompactionUtils::construct_index(schema_pb.add_index(), 10003, "v2_text_index", 2, + false); + + IndexCompactionUtils::construct_column(schema_pb.add_column(), schema_pb.add_index(), 10004, + "v3_index", 3, "INT", "v3"); + _tablet_schema = std::make_shared(); + _tablet_schema->init_from_pb(schema_pb); + + // tablet + TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema)); + + _tablet = std::make_shared(*_engine_ref, tablet_meta, _data_dir.get()); + EXPECT_TRUE(_tablet->init().ok()); + } + void _build_wiki_tablet(const KeysType& keys_type, const InvertedIndexStorageFormatPB& storage_format) { // tablet_schema @@ -876,6 +905,7 @@ TEST_F(IndexCompactionTest, test_col_unique_ids_empty) { testing::HasSubstr("No index with id 10001 found")); } +// Now it will support a column with 2 inverted indices TEST_F(IndexCompactionTest, test_tablet_index_id_not_equal) { _build_tablet(); // replace unique id from 2 to 1 in tablet index 10002 and rebuild tablet_schema @@ -896,7 +926,7 @@ TEST_F(IndexCompactionTest, test_tablet_index_id_not_equal) { data_files.push_back(data_file2); std::vector rowsets(data_files.size()); - auto custom_check_build_rowsets = [](const int32_t& size) { EXPECT_EQ(size, 3); }; + auto custom_check_build_rowsets = [](const int32_t& size) { EXPECT_EQ(size, 4); }; IndexCompactionUtils::build_rowsets( _data_dir, _tablet_schema, _tablet, _engine_ref, rowsets, data_files, _inc_id, custom_check_build_rowsets); @@ -923,9 +953,7 @@ TEST_F(IndexCompactionTest, test_tablet_index_id_not_equal) { // check index file // index 10002 cannot be found in idx file auto dir_idx_compaction = inverted_index_file_reader_index->_open(10002, ""); - EXPECT_TRUE(!dir_idx_compaction.has_value()) << dir_idx_compaction.error(); - EXPECT_THAT(dir_idx_compaction.error().to_string(), - testing::HasSubstr("No index with id 10002 found")); + EXPECT_TRUE(dir_idx_compaction.has_value()); } TEST_F(IndexCompactionTest, test_tablet_schema_tablet_index_is_null) { @@ -1590,4 +1618,50 @@ TEST_F(IndexCompactionTest, tes_wikipedia_mow_v2_multiple_src_lucene_segments) { _build_wiki_tablet(KeysType::UNIQUE_KEYS, InvertedIndexStorageFormatPB::V2); _run_normal_wiki_test(); } + +TEST_F(IndexCompactionTest, test_tablet_multi_index) { + _build_multi_index_tablet(); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok()); + std::string data_file1 = + _current_dir + "/be/test/olap/rowset/segment_v2/inverted_index/data/data1.csv"; + std::string data_file2 = + _current_dir + "/be/test/olap/rowset/segment_v2/inverted_index/data/data2.csv"; + std::vector data_files; + data_files.push_back(data_file1); + data_files.push_back(data_file2); + + std::vector rowsets(data_files.size()); + auto custom_check_build_rowsets = [](const int32_t& size) { EXPECT_EQ(size, 5); }; + IndexCompactionUtils::build_rowsets( + _data_dir, _tablet_schema, _tablet, _engine_ref, rowsets, data_files, _inc_id, + custom_check_build_rowsets); + + auto custom_check_index = [](const BaseCompaction& compaction, const RowsetWriterContext& ctx) { + EXPECT_EQ(compaction._cur_tablet_schema->inverted_indexes().size(), 5); + EXPECT_TRUE(ctx.columns_to_do_index_compaction.size() == 2); + EXPECT_TRUE(ctx.columns_to_do_index_compaction.contains(1)); + EXPECT_TRUE(ctx.columns_to_do_index_compaction.contains(2)); + EXPECT_TRUE(compaction._output_rowset->num_segments() == 1); + }; + + RowsetSharedPtr output_rowset_index; + auto st = IndexCompactionUtils::do_compaction(rowsets, _engine_ref, _tablet, true, + output_rowset_index, custom_check_index); + EXPECT_TRUE(st.ok()) << st.to_string(); + + const auto& seg_path = output_rowset_index->segment_path(0); + EXPECT_TRUE(seg_path.has_value()) << seg_path.error(); + auto inverted_index_file_reader_index = IndexCompactionUtils::init_index_file_reader( + output_rowset_index, seg_path.value(), + _tablet_schema->get_inverted_index_storage_format()); + + auto dir_idx_compaction_2 = inverted_index_file_reader_index->_open(10002, ""); + EXPECT_TRUE(dir_idx_compaction_2.has_value()); + + auto dir_idx_compaction_3 = inverted_index_file_reader_index->_open(10003, ""); + EXPECT_TRUE(dir_idx_compaction_3.has_value()); +} + } // namespace doris diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp index 4f4a601e63cc4f..bf1b3a35e295c0 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/util/index_compaction_utils.cpp @@ -574,7 +574,9 @@ class IndexCompactionUtils { for (const auto& [col_uid, query_data] : query_map) { const auto& column = tablet_schema->column_by_uid(col_uid); - const auto* index = tablet_schema->inverted_index(column); + auto indexs = tablet_schema->inverted_indexs(column); + EXPECT_EQ(indexs.size(), 1); + const auto* index = indexs[0]; EXPECT_TRUE(index != nullptr); if (col_uid == 0 || col_uid == 3) { diff --git a/be/test/olap/tablet_index_test.cpp b/be/test/olap/tablet_index_test.cpp index 7842f9af18d51d..a46ee18d6ef82a 100644 --- a/be/test/olap/tablet_index_test.cpp +++ b/be/test/olap/tablet_index_test.cpp @@ -61,11 +61,11 @@ TEST_F(TabletIndexTest, test_inverted_index) { EXPECT_TRUE(tablet_schema->has_inverted_index()); EXPECT_EQ(tablet_schema->inverted_indexes().size(), 2); - EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(0)) != nullptr); - EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(1)) != nullptr); - EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(2)) == nullptr); - EXPECT_TRUE(tablet_schema->inverted_index(3) == nullptr); - EXPECT_TRUE(tablet_schema->inverted_index(4, "v1.a") == nullptr); + EXPECT_TRUE(!tablet_schema->inverted_indexs(tablet_schema->column_by_uid(0)).empty()); + EXPECT_TRUE(!tablet_schema->inverted_indexs(tablet_schema->column_by_uid(1)).empty()); + EXPECT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column_by_uid(2)).empty()); + EXPECT_TRUE(tablet_schema->inverted_indexs(3).empty()); + EXPECT_TRUE(tablet_schema->inverted_indexs(4, "v1.a").empty()); } TEST_F(TabletIndexTest, test_schema_index_diff) { @@ -97,8 +97,8 @@ TEST_F(TabletIndexTest, test_schema_index_diff) { TabletSchemaSPtr old_tablet_schema = std::make_shared(); old_tablet_schema->init_from_pb(old_schema_pb); - EXPECT_FALSE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), - old_tablet_schema.get(), 0, 0)); + EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), + old_tablet_schema.get(), 0, 0)); EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), old_tablet_schema.get(), 1, 1)); EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), diff --git a/be/test/olap/tablet_schema_index_test.cpp b/be/test/olap/tablet_schema_index_test.cpp new file mode 100644 index 00000000000000..e7cdafe70bb42e --- /dev/null +++ b/be/test/olap/tablet_schema_index_test.cpp @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "olap/tablet_schema.h" + +namespace doris { + +class TabletSchemaIndexTest : public testing::Test { +protected: + void SetUp() override { + // Setup common test data + _tablet_schema = std::make_shared(); + } + + TabletIndex create_test_index(int64_t index_id, IndexType type, + const std::vector& col_uids, + const std::string& suffix = "") { + TabletIndex index; + index._index_id = index_id; + index._index_type = type; + index._col_unique_ids = col_uids; + index.set_escaped_escaped_index_suffix_path(suffix); + return index; + } + + std::shared_ptr _tablet_schema; +}; + +TEST_F(TabletSchemaIndexTest, TestAddInvertedIndex) { + // Add inverted index with suffix + TabletIndex index = create_test_index(1, IndexType::INVERTED, {100}, "suffix1"); + _tablet_schema->append_index(std::move(index)); + + // Verify index mapping + auto found_indexs = _tablet_schema->inverted_indexs(100, "suffix1"); + ASSERT_NE(found_indexs.size(), 0); + EXPECT_EQ(found_indexs[0]->index_id(), 1); + EXPECT_EQ(found_indexs[0]->get_index_suffix(), "suffix1"); +} + +TEST_F(TabletSchemaIndexTest, TestRemoveIndex) { + // Add multiple indexes + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix1")); + _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {200}, "suffix2")); + + // Remove index 1 + _tablet_schema->remove_index(1); + + // Verify index 1 removed + EXPECT_EQ(_tablet_schema->inverted_indexs(100, "suffix1").size(), 0); + + // Verify index 2 still exists + auto found_indexs = _tablet_schema->inverted_indexs(200, "suffix2"); + ASSERT_NE(found_indexs.size(), 0); + EXPECT_EQ(found_indexs[0]->index_id(), 2); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndex) { + // Add initial index + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "old_suffix")); + ASSERT_NE(_tablet_schema->inverted_indexs(100, "old_suffix").size(), 0); + + // Update index with new suffix + _tablet_schema->remove_index(1); + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "new_suffix")); + + // Verify update + EXPECT_EQ(_tablet_schema->inverted_indexs(100, "old_suffix").size(), 0); + auto found_indexs = _tablet_schema->inverted_indexs(100, "new_suffix"); + ASSERT_NE(found_indexs.size(), 0); + EXPECT_EQ(found_indexs[0]->get_index_suffix(), "new%5Fsuffix"); +} + +TEST_F(TabletSchemaIndexTest, TestMultipleColumnsIndex) { + // Add index with multiple columns + TabletIndex index = create_test_index(1, IndexType::INVERTED, {100, 200}, "multi_col"); + _tablet_schema->append_index(std::move(index)); + + // Verify both columns mapped + auto indexs1 = _tablet_schema->inverted_indexs(100, "multi_col"); + auto indexs2 = _tablet_schema->inverted_indexs(200, "multi_col"); + ASSERT_NE(indexs1.size(), 0); + ASSERT_EQ(indexs1[0], indexs2[0]); // Should point to same index +} + +TEST_F(TabletSchemaIndexTest, TestDuplicateIndexKey) { + // Add two indexes with same (type,col,suffix) + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100}, "suffix")); + _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED, {100}, "suffix")); + + // The last added should override + auto found_indexs = _tablet_schema->inverted_indexs(100, "suffix"); + ASSERT_NE(found_indexs.size(), 0); + EXPECT_EQ(found_indexs[0]->index_id(), 1); +} + +TEST_F(TabletSchemaIndexTest, TestClearIndexes) { + _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED, {100})); + _tablet_schema->clear_index(); + + EXPECT_EQ(_tablet_schema->inverted_indexs(100, "").size(), 0); + EXPECT_TRUE(_tablet_schema->inverted_indexes().empty()); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexMethod) { + TabletColumn col; + col.set_parent_unique_id(100); + col.set_path_info(vectorized::PathInData("v2")); + _tablet_schema->append_column(col); + + TabletIndex old_index = create_test_index(1, IndexType::INVERTED, {100}, "v2"); + _tablet_schema->append_index(std::move(old_index)); + + std::vector new_indexs; + TabletIndex new_index = create_test_index(1, IndexType::INVERTED, {100}, "v2"); + new_index._properties["new_prop"] = "value"; + new_indexs.emplace_back(std::move(new_index)); + + _tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_indexs)); + + auto updated_indexs = _tablet_schema->inverted_indexs(100, "v2"); + ASSERT_NE(updated_indexs.size(), 0); + EXPECT_EQ(updated_indexs[0]->index_id(), 1); + EXPECT_EQ(updated_indexs[0]->properties().at("new_prop"), "value"); + + auto key = std::make_tuple(IndexType::INVERTED, 100, "v2"); + EXPECT_NE(_tablet_schema->_col_id_suffix_to_index.find(key), + _tablet_schema->_col_id_suffix_to_index.end()); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexAddNewWhenNotExist) { + // Not exist, return nullptr + TabletColumn col; + col.set_unique_id(200); + + std::vector new_indexs; + new_indexs.emplace_back(create_test_index(2, IndexType::INVERTED, {200}, "v3")); + _tablet_schema->update_index(col, IndexType::INVERTED, std::move(new_indexs)); + + auto indexs = _tablet_schema->inverted_indexs(200, "v3"); + ASSERT_EQ(indexs.size(), 0); +} + +TEST_F(TabletSchemaIndexTest, TestUpdateIndexWithMultipleColumns) { + TabletColumn col1, col2; + col1.set_unique_id(300); + col2.set_unique_id(400); + _tablet_schema->append_column(col1); + _tablet_schema->append_column(col2); + + TabletIndex old_multi_index = create_test_index(3, IndexType::INVERTED, {300, 400}, "multi"); + _tablet_schema->append_index(std::move(old_multi_index)); + + TabletIndex new_multi_index = create_test_index(3, IndexType::NGRAM_BF, {300, 400}); + _tablet_schema->append_index(std::move(new_multi_index)); + + ASSERT_NE(_tablet_schema->inverted_indexs(300, "multi").size(), 0); + EXPECT_NE(_tablet_schema->get_ngram_bf_index(400), nullptr); +} + +} // namespace doris \ No newline at end of file diff --git a/be/test/olap/tablet_schema_multi_index_test.cpp b/be/test/olap/tablet_schema_multi_index_test.cpp new file mode 100644 index 00000000000000..949af0b48b1291 --- /dev/null +++ b/be/test/olap/tablet_schema_multi_index_test.cpp @@ -0,0 +1,373 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "olap/tablet_schema.h" +#include "vec/common/string_ref.h" + +namespace doris { + +class TabletSchemaMultiIndexTest : public ::testing::Test { +protected: + void SetUp() override { + // Common setup for all tests + schema = std::make_shared(); + + auto col1 = std::make_shared(); + col1->_unique_id = 0; + col1->_col_name = "c0"; + schema->_cols.push_back(col1); + + auto col2 = std::make_shared(); + col2->_unique_id = 1; + col2->_col_name = "c1"; + schema->_cols.push_back(col2); + + auto col3 = std::make_shared(); + col3->_unique_id = 2; + col3->_col_name = "c2"; + schema->_cols.push_back(col3); + + schema->_field_name_to_index[StringRef(col1->_col_name)] = 0; + schema->_field_name_to_index[StringRef(col2->_col_name)] = 1; + schema->_field_name_to_index[StringRef(col3->_col_name)] = 2; + + schema->_num_columns = 3; + } + + void TearDown() override { + // Clean up if needed + } + + TabletSchemaSPtr schema; +}; + +TEST_F(TabletSchemaMultiIndexTest, AppendIndex) { + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {1, 2}; + index._escaped_index_suffix_path = "suffix1"; + + // Append first index + schema->append_index(std::move(index)); + + // Verify the index was added + ASSERT_EQ(schema->_indexes.size(), 1); + ASSERT_EQ(schema->_indexes[0]->_index_id, 100); + + // Verify the mapping was created for both columns + TabletSchema::IndexKey key1(IndexType::INVERTED, 1, "suffix1"); + TabletSchema::IndexKey key2(IndexType::INVERTED, 2, "suffix1"); + + ASSERT_EQ(schema->_col_id_suffix_to_index[key1].size(), 1); + ASSERT_EQ(schema->_col_id_suffix_to_index[key2].size(), 1); + ASSERT_EQ(schema->_col_id_suffix_to_index[key1][0], 0); + ASSERT_EQ(schema->_col_id_suffix_to_index[key2][0], 0); + + // Append second index with same columns + TabletIndex index2; + index2._index_id = 101; + index2._index_type = IndexType::INVERTED; + index2._col_unique_ids = {1, 2}; + index2._escaped_index_suffix_path = "suffix1"; + + schema->append_index(std::move(index2)); + + // Verify both indexes are present + ASSERT_EQ(schema->_indexes.size(), 2); + + // Verify mapping now contains both indexes for the columns + ASSERT_EQ(schema->_col_id_suffix_to_index[key1].size(), 2); + ASSERT_EQ(schema->_col_id_suffix_to_index[key2].size(), 2); + ASSERT_EQ(schema->_col_id_suffix_to_index[key1][0], 0); + ASSERT_EQ(schema->_col_id_suffix_to_index[key1][1], 1); +} + +TEST_F(TabletSchemaMultiIndexTest, AppendIndexWithExtractedColumn) { + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {3}; // extracted column unique id + index._escaped_index_suffix_path = "suffix1"; + + schema->append_index(std::move(index)); + + // Should map to parent unique id (1) + TabletSchema::IndexKey key(IndexType::INVERTED, 3, "suffix1"); + ASSERT_EQ(schema->_col_id_suffix_to_index[key].size(), 1); + ASSERT_EQ(schema->_col_id_suffix_to_index[key][0], 0); +} + +TEST_F(TabletSchemaMultiIndexTest, UpdateIndex) { + // Setup initial index + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {1}; + schema->append_index(std::move(index)); + + // Create updated index + std::vector updates; + TabletIndex updated_index; + updated_index._index_id = 101; + updated_index._index_type = IndexType::INVERTED; + updated_index._col_unique_ids = {1}; + std::map propertie = {{"updated_key", "updated_value"}}; + updated_index._properties = propertie; + updates.push_back(updated_index); + + TabletColumn col1; + col1._unique_id = 1; + + // Update the index + schema->update_index(col1, IndexType::INVERTED, std::move(updates)); + + // Verify the index was updated + ASSERT_EQ(schema->_indexes[0]->_index_id, 101); + ASSERT_EQ(schema->_indexes[0]->_properties, propertie); +} + +TEST_F(TabletSchemaMultiIndexTest, UpdateIndexWithMultipleIndexes) { + // Setup two indexes for same column + TabletIndex index1; + index1._index_id = 100; + index1._index_type = IndexType::INVERTED; + index1._col_unique_ids = {1}; + schema->append_index(std::move(index1)); + + TabletIndex index2; + index2._index_id = 101; + index2._index_type = IndexType::INVERTED; + index2._col_unique_ids = {1}; + schema->append_index(std::move(index2)); + + // Create two updates + std::vector updates; + TabletIndex updated1; + updated1._index_id = 102; + updated1._index_type = IndexType::INVERTED; + updated1._col_unique_ids = {1}; + std::map properties1 = {{"updated_key1", "updated_value"}}; + updated1._properties = properties1; + updates.push_back(updated1); + + TabletIndex updated2; + updated2._index_id = 103; + updated2._index_type = IndexType::INVERTED; + updated2._col_unique_ids = {1}; + std::map properties2 = {{"updated_key2", "updated_value"}}; + updated2._properties = properties2; + updates.push_back(updated2); + + TabletColumn col1; + col1._unique_id = 1; + + // Update the indexes + schema->update_index(col1, IndexType::INVERTED, std::move(updates)); + + // Verify both indexes were updated + ASSERT_EQ(schema->_indexes[0]->_index_id, 102); + ASSERT_EQ(schema->_indexes[1]->_index_id, 103); + ASSERT_EQ(schema->_indexes[0]->_properties, properties1); + ASSERT_EQ(schema->_indexes[1]->_properties, properties2); +} + +TEST_F(TabletSchemaMultiIndexTest, UpdateIndexWithMismatchedCount) { + // Setup one index + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {1}; + schema->append_index(std::move(index)); + + // Try to update with two indexes (should fail) + std::vector updates; + TabletIndex updated1; + updated1._index_id = 101; + updated1._index_type = IndexType::INVERTED; + updated1._col_unique_ids = {1}; + std::map properties1 = {{"updated_key1", "updated_value"}}; + updated1._properties = properties1; + updates.push_back(updated1); + + TabletIndex updated2; + updated2._index_id = 102; + updated2._index_type = IndexType::INVERTED; + updated2._col_unique_ids = {1}; + updates.push_back(updated2); + + TabletColumn col1; + col1._unique_id = 1; + + schema->update_index(col1, IndexType::INVERTED, std::move(updates)); + + // Verify the index was NOT updated + ASSERT_NE(schema->_indexes[0]->_properties, properties1); +} + +TEST_F(TabletSchemaMultiIndexTest, RemoveIndex) { + // Setup two indexes + TabletIndex index1; + index1._index_id = 100; + index1._index_type = IndexType::INVERTED; + index1._col_unique_ids = {1}; + index1._escaped_index_suffix_path = ""; + schema->append_index(std::move(index1)); + + TabletIndex index2; + index2._index_id = 101; + index2._index_type = IndexType::INVERTED; + index2._col_unique_ids = {1}; + index2._escaped_index_suffix_path = "suffix1"; + schema->append_index(std::move(index2)); + + // Remove first index + schema->remove_index(100); + + // Verify only second index remains + ASSERT_EQ(schema->_indexes.size(), 1); + ASSERT_EQ(schema->_indexes[0]->_index_id, 101); + + // Verify mapping was updated + TabletSchema::IndexKey key(IndexType::INVERTED, 1, "suffix1"); + ASSERT_EQ(schema->_col_id_suffix_to_index[key].size(), 1); + ASSERT_EQ(schema->_col_id_suffix_to_index[key][0], 0); // Now points to position 0 +} + +TEST_F(TabletSchemaMultiIndexTest, RemoveNonExistentIndex) { + // Setup one index + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {1}; + index._escaped_index_suffix_path = "suffix1"; + schema->append_index(std::move(index)); + + // Try to remove non-existent index + schema->remove_index(999); + + // Verify original index still exists + ASSERT_EQ(schema->_indexes.size(), 1); + ASSERT_EQ(schema->_indexes[0]->_index_id, 100); +} + +TEST_F(TabletSchemaMultiIndexTest, UpdateIndexesFromThrift) { + std::vector tindexes; + + // Create first thrift index + doris::TOlapTableIndex tindex1; + tindex1.__set_index_id(100); + tindex1.__set_index_type(TIndexType::type::INVERTED); + tindex1.columns.push_back("c1"); + tindexes.push_back(tindex1); + + // Create second thrift index + doris::TOlapTableIndex tindex2; + tindex2.__set_index_id(101); + tindex2.__set_index_type(TIndexType::type::INVERTED); + tindex2.columns.push_back("c1"); + tindex2.columns.push_back("c2"); + tindexes.push_back(tindex2); + + // Update from thrift + schema->update_indexes_from_thrift(tindexes); + + // Verify indexes were created + ASSERT_EQ(schema->_indexes.size(), 2); + ASSERT_EQ(schema->_indexes[0]->_index_id, 100); + ASSERT_EQ(schema->_indexes[1]->_index_id, 101); + + // Verify mappings were created + TabletSchema::IndexKey key1(IndexType::INVERTED, 1, ""); + TabletSchema::IndexKey key2(IndexType::INVERTED, 2, ""); + + ASSERT_EQ(schema->_col_id_suffix_to_index[key1].size(), 2); // Both indexes reference col1 + ASSERT_EQ(schema->_col_id_suffix_to_index[key2].size(), 1); // Only second index references col2 +} + +TEST_F(TabletSchemaMultiIndexTest, InvertedIndexesLookup) { + // Setup two inverted indexes for col1 + TabletIndex index1; + index1._index_id = 100; + index1._index_type = IndexType::INVERTED; + index1._col_unique_ids = {1}; + index1._escaped_index_suffix_path = "suffix1"; + schema->append_index(std::move(index1)); + + TabletIndex index2; + index2._index_id = 101; + index2._index_type = IndexType::INVERTED; + index2._col_unique_ids = {1}; + index2._escaped_index_suffix_path = "suffix1"; + schema->append_index(std::move(index2)); + + // Setup one inverted index for col2 + TabletIndex index3; + index3._index_id = 102; + index3._index_type = IndexType::INVERTED; + index3._col_unique_ids = {2}; + index3._escaped_index_suffix_path = "suffix1"; + schema->append_index(std::move(index3)); + + // Lookup indexes for col1 + auto indexes = schema->inverted_indexs(1, "suffix1"); + ASSERT_EQ(indexes.size(), 2); + ASSERT_EQ(indexes[0]->_index_id, 100); + ASSERT_EQ(indexes[1]->_index_id, 101); + + // Lookup indexes for col2 + indexes = schema->inverted_indexs(2, "suffix1"); + ASSERT_EQ(indexes.size(), 1); + ASSERT_EQ(indexes[0]->_index_id, 102); + + // Lookup non-existent column + indexes = schema->inverted_indexs(999, "suffix1"); + ASSERT_TRUE(indexes.empty()); +} + +TEST_F(TabletSchemaMultiIndexTest, InvertedIndexesLookupWithColumnObject) { + // Setup index for extracted column + TabletIndex index; + index._index_id = 100; + index._index_type = IndexType::INVERTED; + index._col_unique_ids = {3}; // extracted column unique id + schema->append_index(std::move(index)); + + TabletColumn col1; + col1._unique_id = 3; + + // Lookup using the extracted column + auto indexes = schema->inverted_indexs(col1); + ASSERT_EQ(indexes.size(), 1); + ASSERT_EQ(indexes[0]->_index_id, 100); +} + +TEST_F(TabletSchemaMultiIndexTest, InvertedIndexesUnsupportedColumn) { + // Create an unsupported column type + TabletColumn unsupported_col; + unsupported_col._unique_id = 4; + + // Should return empty vector for unsupported column + auto indexes = schema->inverted_indexs(unsupported_col); + ASSERT_TRUE(indexes.empty()); +} + +} // namespace doris \ No newline at end of file diff --git a/be/test/vec/common/schema_util_test.cpp b/be/test/vec/common/schema_util_test.cpp index 3d58863d594c8e..48968efdecb71a 100644 --- a/be/test/vec/common/schema_util_test.cpp +++ b/be/test/vec/common/schema_util_test.cpp @@ -90,7 +90,7 @@ void construct_subcolumn(TabletSchemaSPtr schema, const FieldType& type, int32_t // subcolumns->emplace_back(std::move(subcol)); // } -TEST_F(SchemaUtilTest, inherit_column_attributes) { +TEST_F(SchemaUtilTest, test_inherit_column_attributes) { TabletSchemaPB schema_pb; schema_pb.set_keys_type(KeysType::DUP_KEYS); schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); @@ -116,10 +116,10 @@ TEST_F(SchemaUtilTest, inherit_column_attributes) { for (const auto& col : subcolumns) { switch (col._parent_col_unique_id) { case 1: - EXPECT_TRUE(tablet_schema->inverted_index(col) != nullptr); + EXPECT_EQ(tablet_schema->inverted_indexs(col).size(), 1); break; case 3: - EXPECT_TRUE(tablet_schema->inverted_index(col) == nullptr); + EXPECT_EQ(tablet_schema->inverted_indexs(col).size(), 0); break; default: EXPECT_TRUE(false); @@ -458,15 +458,15 @@ TEST_F(SchemaUtilTest, generate_sub_column_info_advanced) { schema_util::generate_sub_column_info(schema, 10, "profile.id.name", &sub_column_info); EXPECT_TRUE(match); EXPECT_EQ(sub_column_info.column.parent_unique_id(), 10); - EXPECT_TRUE(sub_column_info.index); + EXPECT_FALSE(sub_column_info.indexes.empty()); match = schema_util::generate_sub_column_info(schema, 10, "profile.id2", &sub_column_info); EXPECT_TRUE(match); EXPECT_EQ(sub_column_info.column.parent_unique_id(), 10); - EXPECT_TRUE(sub_column_info.index); + EXPECT_FALSE(sub_column_info.indexes.empty()); match = schema_util::generate_sub_column_info(schema, 10, "profilexid", &sub_column_info); EXPECT_TRUE(match); EXPECT_EQ(sub_column_info.column.parent_unique_id(), 10); - EXPECT_FALSE(sub_column_info.index); + EXPECT_TRUE(sub_column_info.indexes.empty()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java index 66eb57c04474ce..d14a42c40668a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java @@ -19,11 +19,14 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.AnalysisException; +import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition; +import org.apache.doris.nereids.types.DataType; import org.apache.doris.thrift.TInvertedIndexFileStorageFormat; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -258,4 +261,29 @@ public static void checkInvertedIndexProperties(Map properties, } } } + + public static boolean canHaveMultipleInvertedIndexes(DataType colType, List indexDefs) { + if (indexDefs.size() == 0 || indexDefs.size() == 1) { + return true; + } + if (!colType.isStringLikeType() && !colType.isVariantType()) { + return false; + } + if (indexDefs.size() > 2) { + return false; + } + boolean findParsedInvertedIndex = false; + boolean findNonParsedInvertedIndex = false; + for (IndexDefinition indexDef : indexDefs) { + if (indexDef.isAnalyzedInvertedIndex()) { + findParsedInvertedIndex = true; + } else { + findNonParsedInvertedIndex = true; + } + } + if (findParsedInvertedIndex && findNonParsedInvertedIndex) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java index 32ddbafa3ce522..bb6f3087832a98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java @@ -142,7 +142,7 @@ public class CreateTableInfo { private String clusterName = null; private List clusterKeysColumnNames = null; private PartitionTableInfo partitionTableInfo; // get when validate - private Map> columnToIndexes = new HashMap<>(); + private Map>> columnToIndexes = new HashMap<>(); /** * constructor for create table @@ -676,6 +676,7 @@ public void validate(ConnectContext ctx) { if (!indexes.isEmpty()) { Set distinct = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); boolean disableInvertedIndexV1ForVariant = false; + TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat; try { invertedIndexFileStorageFormat = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat( @@ -699,7 +700,9 @@ public void validate(ConnectContext ctx) { indexDef.checkColumn(column, keysType, isEnableMergeOnWrite, invertedIndexFileStorageFormat, disableInvertedIndexV1ForVariant); found = true; - columnToIndexes.computeIfAbsent(column, k -> new ArrayList<>()).add(indexDef); + columnToIndexes.computeIfAbsent(column, k -> new HashMap<>()) + .computeIfAbsent(indexDef.getIndexType(), k -> new ArrayList<>()) + .add(indexDef); break; } } @@ -987,52 +990,76 @@ private void generatedColumnCommonCheck() { // 1. if the column is variant type, check it's field pattern is valid // 2. if the column is not variant type, check it's index def is valid private void columnToIndexesCheck() { - for (Map.Entry> entry : columnToIndexes.entrySet()) { + for (Map.Entry>> entry : columnToIndexes.entrySet()) { ColumnDefinition column = entry.getKey(); - List indexDefs = entry.getValue(); - if (column.getType().isVariantType()) { - int variantIndex = 0; - for (IndexDefinition indexDef : indexDefs) { - if (indexDef.getIndexType() != IndexType.INVERTED) { - throw new AnalysisException("column:" + column.getName() + " can only have inverted index."); - } - String fieldPattern = InvertedIndexUtil.getInvertedIndexFieldPattern(indexDef.getProperties()); - if (fieldPattern.isEmpty()) { - ++variantIndex; + Map> indexTypeToIndexDefs = entry.getValue(); + for (Map.Entry> indexDefEntry : indexTypeToIndexDefs.entrySet()) { + IndexType indexType = indexDefEntry.getKey(); + List indexDefs = indexDefEntry.getValue(); + if (indexType != IndexType.INVERTED) { + if (indexDefs.size() > 1) { + throw new AnalysisException("column: " + column.getName() + + " cannot have multiple indexes, index type: " + indexType); + } else { continue; } - boolean findFieldPattern = false; - boolean fieldSupportedIndex = false; - VariantType variantType = (VariantType) column.getType(); - List predefinedFields = variantType.getPredefinedFields(); - for (VariantField field : predefinedFields) { - if (field.getPattern().equals(fieldPattern)) { - findFieldPattern = true; - fieldSupportedIndex = IndexDefinition.isSupportIdxType(field.getDataType()); - break; + } + + // check inverted index + if (column.getType().isVariantType()) { + Map> fieldPatternToIndexDef = new HashMap<>(); + Map fieldPatternToDataType = new HashMap<>(); + for (IndexDefinition indexDef : indexDefs) { + String fieldPattern = InvertedIndexUtil.getInvertedIndexFieldPattern(indexDef.getProperties()); + if (fieldPattern.isEmpty()) { + fieldPatternToIndexDef.computeIfAbsent(fieldPattern, k -> new ArrayList<>()).add(indexDef); + fieldPatternToDataType.put(fieldPattern, column.getType()); + continue; + } + boolean findFieldPattern = false; + VariantType variantType = (VariantType) column.getType(); + List predefinedFields = variantType.getPredefinedFields(); + for (VariantField field : predefinedFields) { + if (field.getPattern().equals(fieldPattern)) { + findFieldPattern = true; + if (!IndexDefinition.isSupportIdxType(field.getDataType())) { + throw new AnalysisException("field pattern: " + + fieldPattern + " is not supported for inverted index" + + " of column: " + column.getName()); + } + fieldPatternToIndexDef.computeIfAbsent(fieldPattern, k -> new ArrayList<>()) + .add(indexDef); + fieldPatternToDataType.put(fieldPattern, field.getDataType()); + break; + } + } + if (!findFieldPattern) { + throw new AnalysisException("can not find field pattern: " + fieldPattern + + " in column: " + column.getName()); } } - if (!findFieldPattern) { - throw new AnalysisException("can not find field pattern: " + fieldPattern - + " in column: " + column.getName()); + for (Map.Entry> fieldIndexEntry : fieldPatternToIndexDef.entrySet()) { + String fieldPattern = fieldIndexEntry.getKey(); + List fieldPatternIndexDefs = fieldIndexEntry.getValue(); + DataType dataType = fieldPatternToDataType.get(fieldPattern); + if (!InvertedIndexUtil.canHaveMultipleInvertedIndexes(dataType, fieldPatternIndexDefs)) { + throw new AnalysisException("column: " + + column.getName() + + " cannot have multiple inverted indexes with field pattern: " + + fieldPattern); + } } - if (!fieldSupportedIndex) { - throw new AnalysisException("field pattern: " - + fieldPattern + " is not supported for inverted index" - + " of column: " + column.getName()); + } else { + for (IndexDefinition indexDef : indexDefs) { + if (!InvertedIndexUtil.getInvertedIndexFieldPattern(indexDef.getProperties()).isEmpty()) { + throw new AnalysisException("column: " + column.getName() + + " cannot have field pattern in index."); + } + } + if (!InvertedIndexUtil.canHaveMultipleInvertedIndexes(column.getType(), indexDefs)) { + throw new AnalysisException("column: " + column.getName() + + " cannot have multiple inverted indexes."); } - } - if (variantIndex > 1) { - throw new AnalysisException("column: " + column.getName() - + " can only have one inverted index without field pattern."); - } - } else { - if (indexDefs.size() > 1) { - throw new AnalysisException("column: " + column.getName() + " cannot have multiple indexes."); - } - IndexDefinition indexDef = indexDefs.get(0); - if (!InvertedIndexUtil.getInvertedIndexFieldPattern(indexDef.getProperties()).isEmpty()) { - throw new AnalysisException("column: " + column.getName() + " cannot have field pattern in index."); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index 5d4e592bd53f24..58b6dd10461ead 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -312,4 +312,16 @@ public String toSql(String tableName) { public Map getProperties() { return properties; } + + public boolean isAnalyzedInvertedIndex() { + return indexType == IndexDef.IndexType.INVERTED + && properties != null + && properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY); + } + + public boolean isNonAnalyzedInvertedIndex() { + return indexType == IndexDef.IndexType.INVERTED + && (properties == null + || !properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY)); + } } diff --git a/regression-test/data/inverted_index_p0/test_single_column_multi_index.out b/regression-test/data/inverted_index_p0/test_single_column_multi_index.out new file mode 100644 index 00000000000000..2ec2c01184c5e5 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_single_column_multi_index.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +8630 + +-- !sql -- +990 + diff --git a/regression-test/data/variant_p0/predefine/test_predefine_type_multi_index.out b/regression-test/data/variant_p0/predefine/test_predefine_type_multi_index.out new file mode 100644 index 00000000000000..c133b7bcb77050 --- /dev/null +++ b/regression-test/data/variant_p0/predefine/test_predefine_type_multi_index.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +3 + +-- !sql -- +3 + +-- !sql -- +13 + +-- !sql -- +13 + +-- !sql -- +13 + +-- !sql -- +13 + diff --git a/regression-test/data/variant_p0/with_index/var_index.out b/regression-test/data/variant_p0/with_index/var_index.out index 5dd0cadd1c3eae..c47ba8e9b946f3 100644 --- a/regression-test/data/variant_p0/with_index/var_index.out +++ b/regression-test/data/variant_p0/with_index/var_index.out @@ -4,6 +4,11 @@ 3 {"a":18811,"b":"hello wworld","c":11111} 4 {"a":1234,"b":"hello xxx world","c":8181111} +-- !sql -- +2 {"a":18811,"b":"hello world","c":1181111} +3 {"a":18811,"b":"hello wworld","c":11111} +4 {"a":1234,"b":"hello xxx world","c":8181111} + -- !sql -- 2 {"a":18811,"b":"hello world","c":1181111} 4 {"a":1234,"b":"hello xxx world","c":8181111} @@ -22,6 +27,3 @@ 11 {"nested":[{"a":1}]} 11 {"nested":[{"b":"1024"}]} --- !sql -- -1 - diff --git a/regression-test/data/variant_p1/predefine/load.out b/regression-test/data/variant_p1/predefine/load.out new file mode 100644 index 00000000000000..1e6975623acbab --- /dev/null +++ b/regression-test/data/variant_p1/predefine/load.out @@ -0,0 +1,91 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +apache/spark +apache/spark +apache/spark +apache/incubator-pekko +apache/spark +apache/spark +apache/spark +apache/spark +apache/infrastructure-puppet +apache/spark + +-- !sql -- +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +Avaiga/taipy-core +BrightspaceUI/core +ChutimaNakaew/lab11-core +ChutimaNakaew/lab11-core +CtrlAltT0m/Cyber-Core-Skills-Course +CyanogenMod/android_system_core +CyanogenMod/android_system_core +CyanogenMod/android_system_core + +-- !sql -- +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core + +-- !sql -- +Jennyhz7/Container-Apache +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/arrow +apache/arrow + +-- !sql -- +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +Avaiga/taipy-core +BrightspaceUI/core +ChutimaNakaew/lab11-core +ChutimaNakaew/lab11-core +CtrlAltT0m/Cyber-Core-Skills-Course +CyanogenMod/android_system_core +CyanogenMod/android_system_core +CyanogenMod/android_system_core + +-- !sql -- +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core + +-- !sql -- +Jennyhz7/Container-Apache +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/airflow +apache/arrow +apache/arrow + +-- !sql -- +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +AttackOnDobby/iOS-Core-Animation-Advanced-Techniques +Avaiga/taipy-core +BrightspaceUI/core +ChutimaNakaew/lab11-core +ChutimaNakaew/lab11-core +CtrlAltT0m/Cyber-Core-Skills-Course +CyanogenMod/android_system_core +CyanogenMod/android_system_core +CyanogenMod/android_system_core + +-- !sql -- +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core +xpressengine/xe-core + diff --git a/regression-test/suites/inverted_index_p0/test_single_column_multi_index.groovy b/regression-test/suites/inverted_index_p0/test_single_column_multi_index.groovy new file mode 100644 index 00000000000000..1eb7eda4db5c77 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_single_column_multi_index.groovy @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_single_column_multi_index", "nonConcurrent"){ + def indexTbName = "test_single_column_multi_index" + + sql "DROP TABLE IF EXISTS ${indexTbName}" + + sql """ + CREATE TABLE ${indexTbName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_keyword_idx (`request`) USING INVERTED COMMENT '', + INDEX request_text_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + if (expected_succ_rows >= 0) { + assertEquals(json.NumberLoadedRows, expected_succ_rows) + } else { + assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + try { + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName, 'test_index_match_all', 'true', 'json', 'documents-1000.json') + + sql "sync" + sql """ set enable_common_expr_pushdown = true; """ + GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute") + + GetDebugPoint().enableDebugPointForAllBEs("inverted_index_reader._select_best_reader", [type: 0]) + try { + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs("inverted_index_reader._select_best_reader") + } + + GetDebugPoint().enableDebugPointForAllBEs("inverted_index_reader._select_best_reader", [type: 1]) + try { + qt_sql """ select count() from ${indexTbName} where (request = 'GET /images/hm_bg.jpg HTTP/1.0'); """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs("inverted_index_reader._select_best_reader") + } + } finally { + GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute") + } +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy b/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy index 40f351bdd2b9fd..26e839e4cbaeb5 100644 --- a/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy +++ b/regression-test/suites/variant_p0/predefine/test_predefine_ddl.groovy @@ -275,4 +275,79 @@ suite("test_predefine_ddl", "p0"){ findException = true } assertTrue(findException) + + findException = false + try { + sql "DROP TABLE IF EXISTS test_ddl_table" + sql """CREATE TABLE test_ddl_table ( + `id` bigint NULL, + `var` variant< + MATCH_NAME 'ab' : int + > NULL, + INDEX idx_ab (var) USING INVERTED PROPERTIES("field_pattern"="ab", "parser"="unicode", "support_phrase" = "true") COMMENT '', + INDEX idx_ab_2 (var) USING INVERTED PROPERTIES("field_pattern"="ab") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")""" + } catch (Exception e) { + log.info(e.getMessage()) + assertTrue(e.getMessage().contains("column: var cannot have multiple inverted indexes with field pattern: ab")) + findException = true + } + assertTrue(findException) + + findException = false + try { + sql "DROP TABLE IF EXISTS test_ddl_table" + sql """CREATE TABLE test_ddl_table ( + `id` bigint NULL, + `var` variant< + MATCH_NAME 'ab' : string + > NULL, + INDEX idx_ab (var) USING INVERTED PROPERTIES("field_pattern"="ab", "parser"="unicode", "support_phrase" = "true") COMMENT '', + INDEX idx_ab_2 (var) USING INVERTED PROPERTIES("field_pattern"="ab") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")""" + } catch (Exception e) { + log.info(e.getMessage()) + findException = true + } + assertFalse(findException) + + findException = false + try { + sql "DROP TABLE IF EXISTS test_ddl_table" + sql """CREATE TABLE test_ddl_table ( + `id` bigint NULL, + `var` variant< + MATCH_NAME 'ab' : string + > NULL, + INDEX idx_ab (var) USING INVERTED PROPERTIES("field_pattern"="ab", "parser"="unicode", "support_phrase" = "true") COMMENT '', + INDEX idx_ab_2 (var) USING INVERTED PROPERTIES("field_pattern"="ab", "parser"="unicode", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")""" + } catch (Exception e) { + log.info(e.getMessage()) + assertTrue(e.getMessage().contains("column: var cannot have multiple inverted indexes with field pattern: ab")) + findException = true + } + assertTrue(findException) + + findException = false + try { + sql "DROP TABLE IF EXISTS test_ddl_table" + sql """CREATE TABLE test_ddl_table ( + `id` bigint NULL, + `var` variant< + MATCH_NAME 'ab' : string + > NULL, + INDEX idx_ab (var) USING INVERTED PROPERTIES("field_pattern"="ab") COMMENT '', + INDEX idx_ab_2 (var) USING INVERTED PROPERTIES("field_pattern"="ab") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) + BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true")""" + } catch (Exception e) { + log.info(e.getMessage()) + assertTrue(e.getMessage().contains("column: var cannot have multiple inverted indexes with field pattern: ab")) + findException = true + } + assertTrue(findException) } \ No newline at end of file diff --git a/regression-test/suites/variant_p0/predefine/test_predefine_type_multi_index.groovy b/regression-test/suites/variant_p0/predefine/test_predefine_type_multi_index.groovy new file mode 100644 index 00000000000000..4f6520837820e8 --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/test_predefine_type_multi_index.groovy @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_variant_predefine_type_multi_index", "p0"){ + sql """ set describe_extend_variant_column = true """ + sql """ set enable_match_without_inverted_index = false """ + sql """ set enable_common_expr_pushdown = true """ + + def tableName = "test_variant_predefine_type_multi_index" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """CREATE TABLE ${tableName} ( + `id` bigint NULL, + `var` variant < + MATCH_NAME 'path.string' : string + > NULL, + INDEX idx_a_d (var) USING INVERTED PROPERTIES("field_pattern"="path.string", "parser"="unicode", "support_phrase" = "true") COMMENT '', + INDEX idx_a_d_2 (var) USING INVERTED PROPERTIES("field_pattern"="path.string") COMMENT '' + ) ENGINE=OLAP DUPLICATE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "disable_auto_compaction" = "true", "variant_max_subcolumns_count" = "10")""" + + sql """insert into ${tableName} values(1, '{"path" : {"int" : 123, "decimal" : 123.123456789012, "string" : "hello"}}'), + (2, '{"path" : {"int" : 456, "decimal" : 456.456789123456, "string" : "world"}}'), + (3, '{"path" : {"int" : 789, "decimal" : 789.789123456789, "string" : "hello"}}'), + (4, '{"path" : {"int" : 100, "decimal" : 100.100123456789, "string" : "world"}}'), + (5, '{"path" : {"int" : 111, "decimal" : 111.111111111111, "string" : "hello"}}')""" + + sql """ set profile_level = 2""" + sql """ set inverted_index_skip_threshold = 0 """ + sql """ set enable_common_expr_pushdown = true """ + sql """ set enable_match_without_inverted_index = false """ + + qt_sql """ select count() from ${tableName} where var['path']['string'] match 'hello' """ + qt_sql """ select count() from ${tableName} where var['path']['string'] = 'hello' """ + + for (int i = 0; i < 10; i++) { + sql """ insert into ${tableName} values(1, '{"path" : {"int" : 123, "decimal" : 123.123456789012, "string" : "hello"}}') """ + } + + trigger_and_wait_compaction(tableName, "cumulative") + + + qt_sql """ select count() from ${tableName} where var['path']['string'] match 'hello' """ + qt_sql """ select count() from ${tableName} where var['path']['string'] = 'hello' """ + + qt_sql """ select count() from ${tableName} where var['path']['string'] match 'hello' """ + qt_sql """ select count() from ${tableName} where var['path']['string'] = 'hello' """ +} \ No newline at end of file diff --git a/regression-test/suites/variant_p0/with_index/var_index.groovy b/regression-test/suites/variant_p0/with_index/var_index.groovy index ce639f34de5be0..a75fbf99a838c1 100644 --- a/regression-test/suites/variant_p0/with_index/var_index.groovy +++ b/regression-test/suites/variant_p0/with_index/var_index.groovy @@ -33,6 +33,7 @@ suite("regression_test_variant_var_index", "p0, nonConcurrent"){ sql """insert into var_index values(2, '{"a" : 18811, "b" : "hello world", "c" : 1181111}')""" sql """insert into var_index values(3, '{"a" : 18811, "b" : "hello wworld", "c" : 11111}')""" sql """insert into var_index values(4, '{"a" : 1234, "b" : "hello xxx world", "c" : 8181111}')""" + qt_sql """select * from var_index where cast(v["a"] as smallint) > 123 and cast(v["b"] as string) match 'hello' and cast(v["c"] as int) > 1024 order by k""" trigger_and_wait_compaction(table_name, "full") sql """ set enable_common_expr_pushdown = true """ sql """set enable_match_without_inverted_index = false""" diff --git a/regression-test/suites/variant_p1/predefine/load.groovy b/regression-test/suites/variant_p1/predefine/load.groovy new file mode 100644 index 00000000000000..313fe4826ef9b7 --- /dev/null +++ b/regression-test/suites/variant_p1/predefine/load.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_predefine_type_multi_index", "p1"){ + + def load_json_data = {table_name, file_name -> + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + set 'memtable_on_sink_node', 'true' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def table_name = "github_events" + sql """DROP TABLE IF EXISTS ${table_name}""" + table_name = "github_events" + int rand_subcolumns_count = Math.floor(Math.random() * (611 - 511 + 1)) + 400 + // int rand_subcolumns_count = 0; + sql """ drop table if exists github_events_2 """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant< + MATCH_NAME 'repo.name' : string, + MATCH_NAME 'payload.pull_request.additions' : int, + MATCH_NAME 'actor.login' : string, + MATCH_NAME 'type' : string, + MATCH_NAME 'payload.action' : string, + MATCH_NAME 'created_at' : datetime, + MATCH_NAME 'payload.issue.number' : int, + MATCH_NAME 'payload.comment.body' : string, + MATCH_NAME 'type.name' : string + > NULL, + INDEX idx_var (`v`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true"), + INDEX idx_var_2 (`v`) USING INVERTED + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "true", "variant_enable_flatten_nested" = "false", "variant_max_subcolumns_count" = "${rand_subcolumns_count}"); + """ + + // 2015 + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-2.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-3.json'}""") + + // 2022 + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-16.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-10.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-22.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2022-11-07-23.json'}""") + + sql """set enable_match_without_inverted_index = false""" + sql """ set enable_common_expr_pushdown = true """ + + qt_sql """select cast(v["repo"]["name"] as string) from github_events where v["repo"]["name"] match 'apache' order by k limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events where cast(v["repo"]["name"] as string) match 'xpressengine/xe-core' order by 1 limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1 limit 10""" + + sql """ drop table if exists github_events_2 """ + sql """ create table github_events_2 like github_events """ + sql """ insert into github_events_2 select * from github_events """ + + trigger_and_wait_compaction(table_name, "cumulative") + qt_sql """select cast(v["repo"]["name"] as string) from github_events where v["repo"]["name"] match 'apache' order by 1 limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events where cast(v["repo"]["name"] as string) match 'xpressengine/xe-core' order by 1 limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1 limit 10""" + + qt_sql """select cast(v["repo"]["name"] as string) from github_events_2 where v["repo"]["name"] match 'apache' order by 1 limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events_2 where cast(v["repo"]["name"] as string) match 'xpressengine/xe-core' order by 1 limit 10;""" + qt_sql """select cast(v["repo"]["name"] as string) from github_events_2 where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1 limit 10""" +}