15
15
use std:: sync:: Arc ;
16
16
use std:: time:: Instant ;
17
17
18
+ use databend_common_base:: runtime:: execute_futures_in_parallel;
18
19
use databend_common_catalog:: table:: Table ;
19
20
use databend_common_exception:: Result ;
20
21
use databend_common_meta_app:: schema:: TableInfo ;
@@ -28,108 +29,131 @@ use opendal::Operator;
28
29
29
30
#[ async_backtrace:: framed]
30
31
pub async fn do_vacuum_drop_table (
31
- table_info : & TableInfo ,
32
- operator : & Operator ,
32
+ tables : Vec < ( TableInfo , Operator ) > ,
33
33
dry_run_limit : Option < usize > ,
34
34
) -> Result < Option < Vec < VacuumDropFileInfo > > > {
35
- let dir = format ! ( "{}/" , FuseTable :: parse_storage_prefix( table_info) ?) ;
35
+ let mut list_files = vec ! [ ] ;
36
+ for ( table_info, operator) in tables {
37
+ let dir = format ! ( "{}/" , FuseTable :: parse_storage_prefix( & table_info) ?) ;
36
38
37
- info ! (
38
- "vacuum drop table {:?} dir {:?}, is_external_table:{:?}" ,
39
- table_info. name,
40
- dir,
41
- table_info. meta. storage_params. is_some( )
42
- ) ;
39
+ info ! (
40
+ "vacuum drop table {:?} dir {:?}, is_external_table:{:?}" ,
41
+ table_info. name,
42
+ dir,
43
+ table_info. meta. storage_params. is_some( )
44
+ ) ;
43
45
44
- let start = Instant :: now ( ) ;
46
+ let start = Instant :: now ( ) ;
45
47
46
- let ret = match dry_run_limit {
47
- None => {
48
- operator. remove_all ( & dir) . await ?;
49
- Ok ( None )
50
- }
51
- Some ( dry_run_limit ) => {
52
- let mut ds = operator
53
- . lister_with ( & dir )
54
- . recursive ( true )
55
- . metakey ( Metakey :: Mode )
56
- . metakey ( Metakey :: ContentLength )
57
- . await ? ;
58
- let mut list_files = Vec :: new ( ) ;
59
- while let Some ( de ) = ds . try_next ( ) . await ? {
60
- let meta = de . metadata ( ) ;
61
- if EntryMode :: FILE == meta . mode ( ) {
62
- list_files . push ( (
63
- table_info . name . clone ( ) ,
64
- de . name ( ) . to_string ( ) ,
65
- meta . content_length ( ) ,
66
- ) ) ;
67
- if list_files . len ( ) >= dry_run_limit {
68
- break ;
48
+ match dry_run_limit {
49
+ None => {
50
+ operator. remove_all ( & dir) . await ?;
51
+ }
52
+ Some ( dry_run_limit ) => {
53
+ let mut ds = operator
54
+ . lister_with ( & dir )
55
+ . recursive ( true )
56
+ . metakey ( Metakey :: Mode )
57
+ . metakey ( Metakey :: ContentLength )
58
+ . await ? ;
59
+
60
+ while let Some ( de ) = ds . try_next ( ) . await ? {
61
+ let meta = de . metadata ( ) ;
62
+ if EntryMode :: FILE == meta . mode ( ) {
63
+ list_files . push ( (
64
+ table_info . name . clone ( ) ,
65
+ de . name ( ) . to_string ( ) ,
66
+ meta . content_length ( ) ,
67
+ ) ) ;
68
+ if list_files . len ( ) >= dry_run_limit {
69
+ break ;
70
+ }
69
71
}
70
72
}
71
73
}
74
+ } ;
72
75
73
- Ok ( Some ( list_files) )
74
- }
75
- } ;
76
-
77
- info ! (
78
- "vacuum drop table {:?} dir {:?}, cost:{} sec" ,
79
- table_info. name,
80
- dir,
81
- start. elapsed( ) . as_secs( )
82
- ) ;
83
- ret
76
+ info ! (
77
+ "vacuum drop table {:?} dir {:?}, cost:{} sec" ,
78
+ table_info. name,
79
+ dir,
80
+ start. elapsed( ) . as_secs( )
81
+ ) ;
82
+ }
83
+ Ok ( if dry_run_limit. is_some ( ) {
84
+ Some ( list_files)
85
+ } else {
86
+ None
87
+ } )
84
88
}
85
89
86
90
#[ async_backtrace:: framed]
87
91
pub async fn do_vacuum_drop_tables (
92
+ threads_nums : usize ,
88
93
tables : Vec < Arc < dyn Table > > ,
89
94
dry_run_limit : Option < usize > ,
90
95
) -> Result < Option < Vec < VacuumDropFileInfo > > > {
91
96
let start = Instant :: now ( ) ;
92
97
let tables_len = tables. len ( ) ;
93
98
info ! ( "do_vacuum_drop_tables {} tables" , tables_len) ;
94
- let mut list_files = Vec :: new ( ) ;
95
- let mut left_limit = dry_run_limit;
99
+
100
+ let batch_size = ( tables_len / threads_nums) . min ( 50 ) . max ( 1 ) ;
101
+
102
+ let mut table_vecs = Vec :: with_capacity ( tables. len ( ) ) ;
96
103
for table in tables {
97
- // only operate fuse table
98
- let ret = if let Ok ( fuse_table) = FuseTable :: try_from_table ( table. as_ref ( ) ) {
99
- let table_info = table. get_table_info ( ) ;
100
- let operator = fuse_table. get_operator_ref ( ) ;
101
- do_vacuum_drop_table ( table_info, operator, left_limit) . await ?
102
- } else {
103
- info ! (
104
- "ignore table {}, which is not of FUSE engine. Table engine {}" ,
105
- table. get_table_info( ) . name,
106
- table. engine( )
107
- ) ;
108
- continue ;
109
- } ;
110
- if let Some ( ret) = ret {
111
- list_files. extend ( ret) ;
112
- if list_files. len ( ) >= dry_run_limit. unwrap ( ) {
104
+ let ( table_info, operator) =
105
+ if let Ok ( fuse_table) = FuseTable :: try_from_table ( table. as_ref ( ) ) {
106
+ ( fuse_table. get_table_info ( ) , fuse_table. get_operator ( ) )
107
+ } else {
113
108
info ! (
114
- "do_vacuum_drop_tables {} tables, cost:{} sec " ,
115
- tables_len ,
116
- start . elapsed ( ) . as_secs ( )
109
+ "ignore table {}, which is not of FUSE engine. Table engine {} " ,
110
+ table . get_table_info ( ) . name ,
111
+ table . engine ( )
117
112
) ;
118
- return Ok ( Some ( list_files) ) ;
119
- } else {
120
- left_limit = Some ( dry_run_limit. unwrap ( ) - list_files. len ( ) ) ;
113
+ continue ;
114
+ } ;
115
+
116
+ table_vecs. push ( ( table_info. clone ( ) , operator) ) ;
117
+ }
118
+
119
+ let result = if batch_size >= table_vecs. len ( ) {
120
+ do_vacuum_drop_table ( table_vecs, dry_run_limit) . await ?
121
+ } else {
122
+ let mut chunks = table_vecs. chunks ( batch_size) ;
123
+ let dry_run_limit = dry_run_limit
124
+ . map ( |dry_run_limit| ( dry_run_limit / threads_nums) . min ( dry_run_limit) . max ( 1 ) ) ;
125
+ let tasks = std:: iter:: from_fn ( move || {
126
+ chunks
127
+ . next ( )
128
+ . map ( |tables| do_vacuum_drop_table ( tables. to_vec ( ) , dry_run_limit) )
129
+ } ) ;
130
+
131
+ let result = execute_futures_in_parallel (
132
+ tasks,
133
+ threads_nums,
134
+ threads_nums * 2 ,
135
+ "batch-vacuum-drop-tables-worker" . to_owned ( ) ,
136
+ )
137
+ . await ?;
138
+ if dry_run_limit. is_some ( ) {
139
+ let mut ret_files = vec ! [ ] ;
140
+ for file in result {
141
+ // return error if any errors happens during `do_vacuum_drop_table`
142
+ if let Some ( files) = file? {
143
+ ret_files. extend ( files) ;
144
+ }
121
145
}
146
+ Some ( ret_files)
147
+ } else {
148
+ None
122
149
}
123
- }
150
+ } ;
151
+
124
152
info ! (
125
153
"do_vacuum_drop_tables {} tables, cost:{} sec" ,
126
154
tables_len,
127
155
start. elapsed( ) . as_secs( )
128
156
) ;
129
157
130
- Ok ( if dry_run_limit. is_some ( ) {
131
- Some ( list_files)
132
- } else {
133
- None
134
- } )
158
+ Ok ( result)
135
159
}
0 commit comments