|
12 | 12 | // See the License for the specific language governing permissions and
|
13 | 13 | // limitations under the License.
|
14 | 14 |
|
15 |
| -use std::collections::hash_map::RandomState; |
16 |
| -use std::collections::HashMap; |
17 |
| -use std::collections::HashSet; |
18 | 15 | use std::sync::Arc;
|
19 | 16 |
|
20 | 17 | use common_base::base::tokio;
|
21 |
| -use common_datablocks::BlockCompactThresholds; |
22 |
| -use common_datablocks::DataBlock; |
23 |
| -use common_datavalues::DataSchema; |
24 |
| -use common_exception::ErrorCode; |
25 | 18 | use common_exception::Result;
|
26 |
| -use common_storages_table_meta::caches::CacheManager; |
27 |
| -use common_storages_table_meta::meta::BlockMeta; |
28 |
| -use common_storages_table_meta::meta::SegmentInfo; |
29 |
| -use common_storages_table_meta::meta::Statistics; |
30 |
| -use common_storages_table_meta::meta::TableSnapshot; |
31 |
| -use common_storages_table_meta::meta::Versioned; |
| 19 | +use common_sql::executor::ExpressionBuilderWithoutRenaming; |
| 20 | +use common_sql::plans::DeletePlan; |
| 21 | +use common_sql::plans::Plan; |
| 22 | +use common_sql::plans::ScalarExpr; |
| 23 | +use common_sql::Planner; |
| 24 | +use common_storages_factory::Table; |
| 25 | +use common_storages_fuse::FuseTable; |
| 26 | +use databend_query::pipelines::executor::ExecutorSettings; |
| 27 | +use databend_query::pipelines::executor::PipelineCompleteExecutor; |
| 28 | +use databend_query::sessions::QueryContext; |
32 | 29 | use databend_query::sessions::TableContext;
|
33 |
| -use databend_query::storages::fuse::io::SegmentWriter; |
34 |
| -use databend_query::storages::fuse::io::TableMetaLocationGenerator; |
35 |
| -use databend_query::storages::fuse::operations::DeletionMutator; |
36 |
| -use databend_query::storages::fuse::statistics::ClusterStatsGenerator; |
37 |
| -use uuid::Uuid; |
38 | 30 |
|
| 31 | +use crate::storages::fuse::table_test_fixture::execute_command; |
| 32 | +use crate::storages::fuse::table_test_fixture::execute_query; |
| 33 | +use crate::storages::fuse::table_test_fixture::expects_ok; |
39 | 34 | use crate::storages::fuse::table_test_fixture::TestFixture;
|
40 | 35 |
|
41 |
| -/// [issue#6570](https://github.com/datafuselabs/databend/issues/6570) |
42 |
| -/// During deletion, there might be multiple segments become empty |
43 |
| -
|
44 | 36 | #[tokio::test]
|
45 | 37 | async fn test_deletion_mutator_multiple_empty_segments() -> Result<()> {
|
46 |
| - // generates a batch of segments, and delete blocks from them |
47 |
| - // so that half of the segments will be empty |
48 |
| - |
49 | 38 | let fixture = TestFixture::new().await;
|
50 | 39 | let ctx = fixture.ctx();
|
51 |
| - let location_generator = TableMetaLocationGenerator::with_prefix("_prefix".to_owned()); |
| 40 | + let tbl_name = fixture.default_table_name(); |
| 41 | + let db_name = fixture.default_db_name(); |
52 | 42 |
|
53 |
| - let segment_info_cache = CacheManager::instance().get_table_segment_cache(); |
54 |
| - let data_accessor = ctx.get_data_operator()?.operator(); |
55 |
| - let seg_writer = SegmentWriter::new(&data_accessor, &location_generator, &segment_info_cache); |
| 43 | + fixture.create_normal_table().await?; |
56 | 44 |
|
57 |
| - let gen_test_seg = || async { |
58 |
| - // generates test segment, each of them contains only one block |
59 |
| - // structures are filled with arbitrary values, no effects for this test case |
60 |
| - let block_id = Uuid::new_v4().simple().to_string(); |
61 |
| - let location = (block_id, DataBlock::VERSION); |
62 |
| - let test_block_meta = Arc::new(BlockMeta::new( |
63 |
| - 1, |
64 |
| - 1, |
65 |
| - 1, |
66 |
| - HashMap::default(), |
67 |
| - HashMap::default(), |
68 |
| - None, |
69 |
| - location.clone(), |
70 |
| - None, |
71 |
| - 0, |
72 |
| - )); |
73 |
| - let segment = SegmentInfo::new(vec![test_block_meta], Statistics::default()); |
74 |
| - Ok::<_, ErrorCode>((seg_writer.write_segment(segment).await?, location)) |
75 |
| - }; |
| 45 | + // insert |
| 46 | + for i in 0..10 { |
| 47 | + let qry = format!("insert into {}.{}(id) values({})", db_name, tbl_name, i); |
| 48 | + execute_command(ctx.clone(), qry.as_str()).await?; |
| 49 | + } |
76 | 50 |
|
77 |
| - // generates 100 segments, for each segment, contains one block |
78 |
| - let mut test_segment_locations = vec![]; |
79 |
| - let mut test_block_locations = vec![]; |
80 |
| - for _ in 0..100 { |
81 |
| - let (segment_location, block_location) = gen_test_seg().await?; |
82 |
| - test_segment_locations.push(segment_location); |
83 |
| - test_block_locations.push(block_location); |
| 51 | + let catalog = ctx.get_catalog(fixture.default_catalog_name().as_str())?; |
| 52 | + let table = catalog |
| 53 | + .get_table(ctx.get_tenant().as_str(), &db_name, &tbl_name) |
| 54 | + .await?; |
| 55 | + // delete |
| 56 | + let query = format!("delete from {}.{} where id=1", db_name, tbl_name); |
| 57 | + let mut planner = Planner::new(ctx.clone()); |
| 58 | + let (plan, _, _) = planner.plan_sql(&query).await?; |
| 59 | + if let Plan::Delete(delete) = plan { |
| 60 | + do_deletion(ctx.clone(), table.clone(), *delete).await?; |
84 | 61 | }
|
85 | 62 |
|
86 |
| - let base_snapshot = TableSnapshot::new( |
87 |
| - Uuid::new_v4(), |
88 |
| - &None, |
89 |
| - None, |
90 |
| - DataSchema::empty(), |
91 |
| - Statistics::default(), |
92 |
| - test_segment_locations.clone(), |
93 |
| - None, |
94 |
| - None, |
| 63 | + // check count |
| 64 | + let expected = vec![ |
| 65 | + "+---------------+-------+", |
| 66 | + "| segment_count | count |", |
| 67 | + "+---------------+-------+", |
| 68 | + "| 9 | 9 |", |
| 69 | + "+---------------+-------+", |
| 70 | + ]; |
| 71 | + let qry = format!( |
| 72 | + "select segment_count, block_count as count from fuse_snapshot('{}', '{}') limit 1", |
| 73 | + db_name, tbl_name |
95 | 74 | );
|
| 75 | + expects_ok( |
| 76 | + "check segment and block count", |
| 77 | + execute_query(fixture.ctx(), qry.as_str()).await, |
| 78 | + expected, |
| 79 | + ) |
| 80 | + .await?; |
| 81 | + Ok(()) |
| 82 | +} |
96 | 83 |
|
97 |
| - let table_ctx: Arc<dyn TableContext> = ctx as Arc<dyn TableContext>; |
98 |
| - let mut mutator = DeletionMutator::try_create( |
99 |
| - table_ctx, |
100 |
| - data_accessor.clone(), |
101 |
| - location_generator, |
102 |
| - Arc::new(base_snapshot), |
103 |
| - ClusterStatsGenerator::default(), |
104 |
| - BlockCompactThresholds::default(), |
105 |
| - )?; |
| 84 | +pub async fn do_deletion( |
| 85 | + ctx: Arc<QueryContext>, |
| 86 | + table: Arc<dyn Table>, |
| 87 | + plan: DeletePlan, |
| 88 | +) -> Result<()> { |
| 89 | + let (filter, col_indices) = if let Some(scalar) = &plan.selection { |
| 90 | + let eb = ExpressionBuilderWithoutRenaming::create(plan.metadata.clone()); |
| 91 | + ( |
| 92 | + Some(eb.build(scalar)?), |
| 93 | + scalar.used_columns().into_iter().collect(), |
| 94 | + ) |
| 95 | + } else { |
| 96 | + (None, vec![]) |
| 97 | + }; |
106 | 98 |
|
107 |
| - // clear half of the segments |
108 |
| - for (i, _) in test_segment_locations.iter().enumerate().take(100) { |
109 |
| - if i % 2 == 0 { |
110 |
| - // empty the segment (segment only contains one block) |
111 |
| - mutator |
112 |
| - .replace_with(i, test_block_locations[i].clone(), None, DataBlock::empty()) |
113 |
| - .await?; |
114 |
| - } |
| 99 | + let fuse_table = FuseTable::try_from_table(table.as_ref())?; |
| 100 | + let settings = ctx.get_settings(); |
| 101 | + let mut pipeline = common_pipeline_core::Pipeline::create(); |
| 102 | + fuse_table |
| 103 | + .delete(ctx.clone(), filter, col_indices, &mut pipeline) |
| 104 | + .await?; |
| 105 | + if !pipeline.pipes.is_empty() { |
| 106 | + pipeline.set_max_threads(settings.get_max_threads()? as usize); |
| 107 | + let executor_settings = ExecutorSettings::try_create(&settings)?; |
| 108 | + let executor = PipelineCompleteExecutor::try_create(pipeline, executor_settings)?; |
| 109 | + ctx.set_executor(Arc::downgrade(&executor.get_inner())); |
| 110 | + executor.execute()?; |
| 111 | + drop(executor); |
115 | 112 | }
|
116 |
| - |
117 |
| - let (segments, _, _) = mutator.generate_segments().await?; |
118 |
| - |
119 |
| - // half segments left after deletion |
120 |
| - assert_eq!(segments.len(), 50); |
121 |
| - |
122 |
| - // new_segments should be a subset of test_segments in our case (no partial deletion of segment) |
123 |
| - let new_segments = HashSet::<_, RandomState>::from_iter(segments.into_iter()); |
124 |
| - let test_segments = HashSet::from_iter(test_segment_locations.into_iter()); |
125 |
| - assert!(new_segments.is_subset(&test_segments)); |
126 |
| - |
127 | 113 | Ok(())
|
128 | 114 | }
|
0 commit comments