@@ -44,23 +44,32 @@ pub(crate) trait SnapshotProduceOperation: Send + Sync {
44
44
) -> impl Future < Output = Result < Vec < ManifestEntry > > > + Send ;
45
45
fn existing_manifest (
46
46
& self ,
47
- table : & Table ,
47
+ snapshot_produce : & SnapshotProducer < ' _ > ,
48
48
) -> impl Future < Output = Result < Vec < ManifestFile > > > + Send ;
49
49
}
50
50
51
51
pub ( crate ) struct DefaultManifestProcess ;
52
52
53
53
impl ManifestProcess for DefaultManifestProcess {
54
- fn process_manifests ( & self , manifests : Vec < ManifestFile > ) -> Vec < ManifestFile > {
54
+ fn process_manifests (
55
+ & self ,
56
+ _snapshot_produce : & SnapshotProducer < ' _ > ,
57
+ manifests : Vec < ManifestFile > ,
58
+ ) -> Vec < ManifestFile > {
55
59
manifests
56
60
}
57
61
}
58
62
59
63
pub ( crate ) trait ManifestProcess : Send + Sync {
60
- fn process_manifests ( & self , manifests : Vec < ManifestFile > ) -> Vec < ManifestFile > ;
64
+ fn process_manifests (
65
+ & self ,
66
+ snapshot_produce : & SnapshotProducer < ' _ > ,
67
+ manifests : Vec < ManifestFile > ,
68
+ ) -> Vec < ManifestFile > ;
61
69
}
62
70
63
- pub ( crate ) struct SnapshotProducer {
71
+ pub ( crate ) struct SnapshotProducer < ' a > {
72
+ pub ( crate ) table : & ' a Table ,
64
73
snapshot_id : i64 ,
65
74
commit_uuid : Uuid ,
66
75
key_metadata : Option < Vec < u8 > > ,
@@ -72,15 +81,16 @@ pub(crate) struct SnapshotProducer {
72
81
manifest_counter : RangeFrom < u64 > ,
73
82
}
74
83
75
- impl SnapshotProducer {
84
+ impl < ' a > SnapshotProducer < ' a > {
76
85
pub ( crate ) fn new (
77
- table : & Table ,
86
+ table : & ' a Table ,
78
87
commit_uuid : Uuid ,
79
88
key_metadata : Option < Vec < u8 > > ,
80
89
snapshot_properties : HashMap < String , String > ,
81
90
added_data_files : Vec < DataFile > ,
82
91
) -> Self {
83
92
Self {
93
+ table,
84
94
snapshot_id : Self :: generate_unique_snapshot_id ( table) ,
85
95
commit_uuid,
86
96
key_metadata,
@@ -177,28 +187,28 @@ impl SnapshotProducer {
177
187
snapshot_id
178
188
}
179
189
180
- fn new_manifest_writer (
181
- & mut self ,
182
- content : ManifestContentType ,
183
- table : & Table ,
184
- ) -> Result < ManifestWriter > {
190
+ fn new_manifest_writer ( & mut self , content : ManifestContentType ) -> Result < ManifestWriter > {
185
191
let new_manifest_path = format ! (
186
192
"{}/{}/{}-m{}.{}" ,
187
- table. metadata( ) . location( ) ,
193
+ self . table. metadata( ) . location( ) ,
188
194
META_ROOT_PATH ,
189
195
self . commit_uuid,
190
196
self . manifest_counter. next( ) . unwrap( ) ,
191
197
DataFileFormat :: Avro
192
198
) ;
193
- let output_file = table. file_io ( ) . new_output ( new_manifest_path) ?;
199
+ let output_file = self . table . file_io ( ) . new_output ( new_manifest_path) ?;
194
200
let builder = ManifestWriterBuilder :: new (
195
201
output_file,
196
202
Some ( self . snapshot_id ) ,
197
203
self . key_metadata . clone ( ) ,
198
- table. metadata ( ) . current_schema ( ) . clone ( ) ,
199
- table. metadata ( ) . default_partition_spec ( ) . as_ref ( ) . clone ( ) ,
204
+ self . table . metadata ( ) . current_schema ( ) . clone ( ) ,
205
+ self . table
206
+ . metadata ( )
207
+ . default_partition_spec ( )
208
+ . as_ref ( )
209
+ . clone ( ) ,
200
210
) ;
201
- if table. metadata ( ) . format_version ( ) == FormatVersion :: V1 {
211
+ if self . table . metadata ( ) . format_version ( ) == FormatVersion :: V1 {
202
212
Ok ( builder. build_v1 ( ) )
203
213
} else {
204
214
match content {
@@ -240,7 +250,7 @@ impl SnapshotProducer {
240
250
}
241
251
242
252
// Write manifest file for added data files and return the ManifestFile for ManifestList.
243
- async fn write_added_manifest ( & mut self , table : & Table ) -> Result < ManifestFile > {
253
+ async fn write_added_manifest ( & mut self ) -> Result < ManifestFile > {
244
254
let added_data_files = std:: mem:: take ( & mut self . added_data_files ) ;
245
255
if added_data_files. is_empty ( ) {
246
256
return Err ( Error :: new (
@@ -250,7 +260,7 @@ impl SnapshotProducer {
250
260
}
251
261
252
262
let snapshot_id = self . snapshot_id ;
253
- let format_version = table. metadata ( ) . format_version ( ) ;
263
+ let format_version = self . table . metadata ( ) . format_version ( ) ;
254
264
let manifest_entries = added_data_files. into_iter ( ) . map ( |data_file| {
255
265
let builder = ManifestEntry :: builder ( )
256
266
. status ( crate :: spec:: ManifestStatus :: Added )
@@ -263,7 +273,7 @@ impl SnapshotProducer {
263
273
builder. build ( )
264
274
}
265
275
} ) ;
266
- let mut writer = self . new_manifest_writer ( ManifestContentType :: Data , table ) ?;
276
+ let mut writer = self . new_manifest_writer ( ManifestContentType :: Data ) ?;
267
277
for entry in manifest_entries {
268
278
writer. add_entry ( entry) ?;
269
279
}
@@ -272,29 +282,27 @@ impl SnapshotProducer {
272
282
273
283
async fn manifest_file < OP : SnapshotProduceOperation , MP : ManifestProcess > (
274
284
& mut self ,
275
- table : & Table ,
276
285
snapshot_produce_operation : & OP ,
277
286
manifest_process : & MP ,
278
287
) -> Result < Vec < ManifestFile > > {
279
- let added_manifest = self . write_added_manifest ( table ) . await ?;
280
- let existing_manifests = snapshot_produce_operation. existing_manifest ( table ) . await ?;
288
+ let added_manifest = self . write_added_manifest ( ) . await ?;
289
+ let existing_manifests = snapshot_produce_operation. existing_manifest ( self ) . await ?;
281
290
// # TODO
282
291
// Support process delete entries.
283
292
284
293
let mut manifest_files = vec ! [ added_manifest] ;
285
294
manifest_files. extend ( existing_manifests) ;
286
- let manifest_files = manifest_process. process_manifests ( manifest_files) ;
295
+ let manifest_files = manifest_process. process_manifests ( self , manifest_files) ;
287
296
Ok ( manifest_files)
288
297
}
289
298
290
299
// Returns a `Summary` of the current snapshot
291
300
fn summary < OP : SnapshotProduceOperation > (
292
301
& self ,
293
- table : & Table ,
294
302
snapshot_produce_operation : & OP ,
295
303
) -> Result < Summary > {
296
304
let mut summary_collector = SnapshotSummaryCollector :: default ( ) ;
297
- let table_metadata = table. metadata_ref ( ) ;
305
+ let table_metadata = self . table . metadata_ref ( ) ;
298
306
299
307
let partition_summary_limit = if let Some ( limit) = table_metadata
300
308
. properties ( )
@@ -339,10 +347,10 @@ impl SnapshotProducer {
339
347
)
340
348
}
341
349
342
- fn generate_manifest_list_file_path ( & self , table : & Table , attempt : i64 ) -> String {
350
+ fn generate_manifest_list_file_path ( & self , attempt : i64 ) -> String {
343
351
format ! (
344
352
"{}/{}/snap-{}-{}-{}.{}" ,
345
- table. metadata( ) . location( ) ,
353
+ self . table. metadata( ) . location( ) ,
346
354
META_ROOT_PATH ,
347
355
self . snapshot_id,
348
356
attempt,
@@ -354,34 +362,34 @@ impl SnapshotProducer {
354
362
/// Finished building the action and return the [`ActionCommit`] to the transaction.
355
363
pub ( crate ) async fn commit < OP : SnapshotProduceOperation , MP : ManifestProcess > (
356
364
mut self ,
357
- table : & Table ,
358
365
snapshot_produce_operation : OP ,
359
366
process : MP ,
360
367
) -> Result < ActionCommit > {
361
368
let new_manifests = self
362
- . manifest_file ( table , & snapshot_produce_operation, & process)
369
+ . manifest_file ( & snapshot_produce_operation, & process)
363
370
. await ?;
364
- let next_seq_num = table. metadata ( ) . next_sequence_number ( ) ;
371
+ let next_seq_num = self . table . metadata ( ) . next_sequence_number ( ) ;
365
372
366
- let summary = self
367
- . summary ( table, & snapshot_produce_operation)
368
- . map_err ( |err| {
369
- Error :: new ( ErrorKind :: Unexpected , "Failed to create snapshot summary." )
370
- . with_source ( err)
371
- } ) ?;
373
+ let summary = self . summary ( & snapshot_produce_operation) . map_err ( |err| {
374
+ Error :: new ( ErrorKind :: Unexpected , "Failed to create snapshot summary." ) . with_source ( err)
375
+ } ) ?;
372
376
373
- let manifest_list_path = self . generate_manifest_list_file_path ( table , 0 ) ;
377
+ let manifest_list_path = self . generate_manifest_list_file_path ( 0 ) ;
374
378
375
- let mut manifest_list_writer = match table. metadata ( ) . format_version ( ) {
379
+ let mut manifest_list_writer = match self . table . metadata ( ) . format_version ( ) {
376
380
FormatVersion :: V1 => ManifestListWriter :: v1 (
377
- table. file_io ( ) . new_output ( manifest_list_path. clone ( ) ) ?,
381
+ self . table
382
+ . file_io ( )
383
+ . new_output ( manifest_list_path. clone ( ) ) ?,
378
384
self . snapshot_id ,
379
- table. metadata ( ) . current_snapshot_id ( ) ,
385
+ self . table . metadata ( ) . current_snapshot_id ( ) ,
380
386
) ,
381
387
FormatVersion :: V2 => ManifestListWriter :: v2 (
382
- table. file_io ( ) . new_output ( manifest_list_path. clone ( ) ) ?,
388
+ self . table
389
+ . file_io ( )
390
+ . new_output ( manifest_list_path. clone ( ) ) ?,
383
391
self . snapshot_id ,
384
- table. metadata ( ) . current_snapshot_id ( ) ,
392
+ self . table . metadata ( ) . current_snapshot_id ( ) ,
385
393
next_seq_num,
386
394
) ,
387
395
} ;
@@ -392,10 +400,10 @@ impl SnapshotProducer {
392
400
let new_snapshot = Snapshot :: builder ( )
393
401
. with_manifest_list ( manifest_list_path)
394
402
. with_snapshot_id ( self . snapshot_id )
395
- . with_parent_snapshot_id ( table. metadata ( ) . current_snapshot_id ( ) )
403
+ . with_parent_snapshot_id ( self . table . metadata ( ) . current_snapshot_id ( ) )
396
404
. with_sequence_number ( next_seq_num)
397
405
. with_summary ( summary)
398
- . with_schema_id ( table. metadata ( ) . current_schema_id ( ) )
406
+ . with_schema_id ( self . table . metadata ( ) . current_schema_id ( ) )
399
407
. with_timestamp_ms ( commit_ts)
400
408
. build ( ) ;
401
409
@@ -414,11 +422,11 @@ impl SnapshotProducer {
414
422
415
423
let requirements = vec ! [
416
424
TableRequirement :: UuidMatch {
417
- uuid: table. metadata( ) . uuid( ) ,
425
+ uuid: self . table. metadata( ) . uuid( ) ,
418
426
} ,
419
427
TableRequirement :: RefSnapshotIdMatch {
420
428
r#ref: MAIN_BRANCH . to_string( ) ,
421
- snapshot_id: table. metadata( ) . current_snapshot_id( ) ,
429
+ snapshot_id: self . table. metadata( ) . current_snapshot_id( ) ,
422
430
} ,
423
431
] ;
424
432
0 commit comments