Skip to content

Commit 9852392

Browse files
committed
update doc, add more test case
1 parent 7875643 commit 9852392

File tree

5 files changed

+36
-5
lines changed

5 files changed

+36
-5
lines changed

docs/doc/14-sql-commands/00-ddl/20-table/60-optimize-table.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ title: OPTIMIZE TABLE
55
The objective of optimizing a table in Databend is to compact or purge its historical data in your object storage. This helps save storage space and improve query efficiency.
66

77
:::caution
8-
Databend's Time Travel feature relies on historical data. If you purge historical data from a table with the command `OPTIMIZE TABLE <your_table> PURGE` or `OPTIMIZE TABLE <your_table> ALL`, the table will not be eligible for time travel. The command removes all snapshots (except the most recent one) and their associated segments and block files.
8+
Databend's Time Travel feature relies on historical data. If you purge historical data from a table with the command `OPTIMIZE TABLE <your_table> PURGE` or `OPTIMIZE TABLE <your_table> ALL`, the table will not be eligible for time travel. The command removes all snapshots (except the most recent one) and their associated segments,block files and table statistic file.
99
:::
1010

11-
## What are Snapshot, Segment, and Block?
11+
## What are Snapshot, Segment, Block and Table statistic file?
1212

1313
Snapshot, segment, and block are the concepts Databend uses for data storage. Databend uses them to construct a hierarchical structure for storing table data.
1414

@@ -20,6 +20,8 @@ A snapshot is a JSON file that does not save the table's data but indicate the s
2020

2121
A segment is a JSON file that organizes the storage blocks (at least 1, at most 1,000) where the data is stored. If you run [FUSE_SEGMENT](../../../15-sql-functions/111-system-functions/fuse_segment.md) against a snapshot with the snapshot ID, you can find which segments are referenced by the snapshot.
2222

23+
A table statistic file is a JSON file that save table statistic data, such as distinct values of table column.
24+
2325
Databends saves actual table data in parquet files and considers each parquet file as a block. If you run [FUSE_BLOCK](../../../15-sql-functions/111-system-functions/fuse_block.md) against a snapshot with the snapshot ID, you can find which blocks are referenced by the snapshot.
2426

2527
Databend creates a unique ID for each database and table for storing the snapshot, segment, and block files and saves them to your object storage in the path `<bucket_name>/[root]/<db_id>/<table_id>/`. Each snapshot, segment, and block file is named with a UUID (32-character lowercase hexadecimal string).
@@ -29,6 +31,7 @@ Databend creates a unique ID for each database and table for storing the snapsho
2931
| Snapshot | JSON | `<32bitUUID>_<version>.json` | `<bucket_name>/[root]/<db_id>/<table_id>/_ss/` |
3032
| Segment | JSON | `<32bitUUID>_<version>.json` | `<bucket_name>/[root]/<db_id>/<table_id>/_sg/` |
3133
| Block | parquet | `<32bitUUID>_<version>.parquet` | `<bucket_name>/[root]/<db_id>/<table_id>/_b/` |
34+
| Table statistic | JSON | `<32bitUUID>_<version>.json` | `<bucket_name>/[root]/<db_id>/<table_id>/_ts/` |
3235

3336
## Table Optimization Considerations
3437

@@ -69,7 +72,7 @@ OPTIMIZE TABLE [database.]table_name [ PURGE | COMPACT | ALL | STATISTIC ] [SEGM
6972

7073
- `OPTIMIZE TABLE <table_name> PURGE`
7174

72-
Purges the historical data of table. Only the latest snapshot (including the segments and blocks referenced by this snapshot) will be kept.
75+
Purges the historical data of table. Only the latest snapshot (including the segments, blocks and table statistic file referenced by this snapshot) will be kept.
7376

7477
- `OPTIMIZE TABLE <table_name> COMPACT [LIMIT <segment_count>]`
7578

src/query/service/tests/it/storages/fuse/operations/optimize.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,14 @@ async fn test_fuse_snapshot_optimize_statistic_purge() -> Result<()> {
6363
let ctx = fixture.ctx();
6464
execute_command(ctx, &qry).await?;
6565

66-
check_data_dir(&fixture, case_name, 3, 1 + i, 2, 2, 2, Some(())).await?;
66+
check_data_dir(&fixture, case_name, 3, 1 + i, 2, 2, 2, Some(()), None).await?;
6767
}
6868

6969
// After compact, all the count will become 1
7070
let qry = format!("optimize table {}.{} all", db, tbl);
7171
execute_command(fixture.ctx().clone(), &qry).await?;
7272

73-
check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, Some(())).await?;
73+
check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, Some(()), Some(())).await?;
7474

7575
Ok(())
7676
}
@@ -111,6 +111,7 @@ async fn do_purge_test(
111111
block_count,
112112
index_count,
113113
Some(()),
114+
None,
114115
)
115116
.await?;
116117
history_should_have_item(&fixture, case_name, snapshot_count).await?;
@@ -130,6 +131,7 @@ async fn do_purge_test(
130131
block_count,
131132
index_count,
132133
Some(()),
134+
None,
133135
)
134136
.await?;
135137

src/query/service/tests/it/storages/fuse/operations/purge_drop.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ async fn test_fuse_snapshot_truncate_in_drop_all_stmt() -> Result<()> {
5959
0, // 0 blocks
6060
0, // 0 index
6161
None,
62+
None,
6263
)
6364
.await?;
6465
Ok(())

src/query/service/tests/it/storages/fuse/operations/purge_truncate.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ async fn test_fuse_truncate_purge_stmt() -> Result<()> {
4343
2,
4444
expected_index_count,
4545
Some(()),
46+
None,
4647
)
4748
.await?;
4849

@@ -68,6 +69,7 @@ async fn test_fuse_truncate_purge_stmt() -> Result<()> {
6869
0,
6970
0,
7071
Some(()),
72+
None,
7173
)
7274
.await?;
7375
Ok(())

src/query/service/tests/it/storages/fuse/table_test_fixture.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ pub async fn check_data_dir(
465465
block_count: u32,
466466
index_count: u32,
467467
check_last_snapshot: Option<()>,
468+
check_table_statistic_file: Option<()>,
468469
) -> Result<()> {
469470
let data_path = match fixture.ctx().get_config().storage.params {
470471
StorageParams::Fs(v) => v.root,
@@ -477,6 +478,7 @@ pub async fn check_data_dir(
477478
let mut b_count = 0;
478479
let mut i_count = 0;
479480
let mut last_snapshot_loc = "".to_string();
481+
let mut table_statistic_files = vec![];
480482
let prefix_snapshot = FUSE_TBL_SNAPSHOT_PREFIX;
481483
let prefix_snapshot_statistics = FUSE_TBL_SNAPSHOT_STATISTICS_PREFIX;
482484
let prefix_segment = FUSE_TBL_SEGMENT_PREFIX;
@@ -500,6 +502,7 @@ pub async fn check_data_dir(
500502
i_count += 1;
501503
} else if path.starts_with(prefix_snapshot_statistics) {
502504
ts_count += 1;
505+
table_statistic_files.push(entry_path.to_string());
503506
} else if path.starts_with(prefix_last_snapshot_hint) && check_last_snapshot.is_some() {
504507
let content = fixture
505508
.ctx
@@ -552,6 +555,26 @@ pub async fn check_data_dir(
552555
);
553556
}
554557

558+
if check_table_statistic_file.is_some() {
559+
let table = fixture.latest_default_table().await?;
560+
let fuse_table = FuseTable::try_from_table(table.as_ref())?;
561+
let snapshot_opt = fuse_table.read_table_snapshot().await?;
562+
assert!(snapshot_opt.is_some());
563+
let snapshot = snapshot_opt.unwrap();
564+
let ts_location_opt = snapshot.table_statistics_location.clone();
565+
assert!(ts_location_opt.is_some());
566+
let ts_location = ts_location_opt.unwrap();
567+
println!(
568+
"ts_location_opt: {:?}, table_statistic_files: {:?}",
569+
ts_location, table_statistic_files
570+
);
571+
assert!(
572+
table_statistic_files
573+
.iter()
574+
.any(|e| e.contains(&ts_location))
575+
);
576+
}
577+
555578
Ok(())
556579
}
557580

0 commit comments

Comments
 (0)