Skip to content

Commit 8b51fa8

Browse files
zhang2014BohuTANG
andauthored
chore(query): add spill profile for join (#15044)
* chore(query): add spill profile for join * chore(query): add spill profile for join --------- Co-authored-by: Bohu <overred.shuttler@gmail.com>
1 parent 89cdf6b commit 8b51fa8

File tree

2 files changed

+27
-43
lines changed

2 files changed

+27
-43
lines changed

src/query/service/src/pipelines/processors/transforms/transform_sort_spill.rs

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ use std::marker::PhantomData;
1818
use std::sync::Arc;
1919
use std::time::Instant;
2020

21-
use databend_common_base::runtime::profile::Profile;
22-
use databend_common_base::runtime::profile::ProfileStatisticsName;
2321
use databend_common_exception::Result;
2422
use databend_common_expression::types::DataType;
2523
use databend_common_expression::types::NumberDataType;
@@ -285,13 +283,6 @@ where R: Rows + Sync + Send + 'static
285283
metrics_inc_sort_spill_write_count();
286284
metrics_inc_sort_spill_write_bytes(bytes);
287285
metrics_inc_sort_spill_write_milliseconds(ins.elapsed().as_millis() as u64);
288-
289-
Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1);
290-
Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, bytes as usize);
291-
Profile::record_usize_profile(
292-
ProfileStatisticsName::SpillWriteTime,
293-
ins.elapsed().as_millis() as usize,
294-
);
295286
}
296287

297288
self.unmerged_blocks.push_back(vec![location].into());
@@ -351,16 +342,6 @@ where R: Rows + Sync + Send + 'static
351342
metrics_inc_sort_spill_read_count();
352343
metrics_inc_sort_spill_read_bytes(bytes);
353344
metrics_inc_sort_spill_read_milliseconds(ins.elapsed().as_millis() as u64);
354-
355-
Profile::record_usize_profile(ProfileStatisticsName::SpillReadCount, 1);
356-
Profile::record_usize_profile(
357-
ProfileStatisticsName::SpillReadBytes,
358-
bytes as usize,
359-
);
360-
Profile::record_usize_profile(
361-
ProfileStatisticsName::SpillReadTime,
362-
ins.elapsed().as_millis() as usize,
363-
);
364345
}
365346

366347
self.output_data = Some(block);
@@ -395,16 +376,6 @@ where R: Rows + Sync + Send + 'static
395376
metrics_inc_sort_spill_write_count();
396377
metrics_inc_sort_spill_write_bytes(bytes);
397378
metrics_inc_sort_spill_write_milliseconds(ins.elapsed().as_millis() as u64);
398-
399-
Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1);
400-
Profile::record_usize_profile(
401-
ProfileStatisticsName::SpillWriteBytes,
402-
bytes as usize,
403-
);
404-
Profile::record_usize_profile(
405-
ProfileStatisticsName::SpillWriteTime,
406-
ins.elapsed().as_millis() as usize,
407-
);
408379
}
409380

410381
spilled.push_back(location);
@@ -437,16 +408,6 @@ impl SortedStream for BlockStream {
437408
metrics_inc_sort_spill_read_count();
438409
metrics_inc_sort_spill_read_bytes(bytes);
439410
metrics_inc_sort_spill_read_milliseconds(ins.elapsed().as_millis() as u64);
440-
441-
Profile::record_usize_profile(ProfileStatisticsName::SpillReadCount, 1);
442-
Profile::record_usize_profile(
443-
ProfileStatisticsName::SpillReadBytes,
444-
bytes as usize,
445-
);
446-
Profile::record_usize_profile(
447-
ProfileStatisticsName::SpillReadTime,
448-
ins.elapsed().as_millis() as usize,
449-
);
450411
}
451412

452413
Some(block)

src/query/service/src/spillers/spiller.rs

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ use std::collections::HashSet;
1717
use std::fmt::Display;
1818
use std::fmt::Formatter;
1919
use std::sync::Arc;
20+
use std::time::Instant;
2021

2122
use databend_common_base::base::GlobalUniqName;
2223
use databend_common_base::base::ProgressValues;
24+
use databend_common_base::runtime::profile::Profile;
25+
use databend_common_base::runtime::profile::ProfileStatisticsName;
2326
use databend_common_catalog::table_context::TableContext;
2427
use databend_common_exception::Result;
2528
use databend_common_expression::arrow::deserialize_column;
@@ -35,8 +38,9 @@ use crate::spillers::spiller_buffer::SpillerBuffer;
3538
pub enum SpillerType {
3639
HashJoinBuild,
3740
HashJoinProbe,
38-
OrderBy, /* Todo: Add more spillers type
39-
* Aggregation */
41+
OrderBy,
42+
// Todo: Add more spillers type
43+
// Aggregation
4044
}
4145

4246
impl Display for SpillerType {
@@ -111,21 +115,31 @@ impl Spiller {
111115
pub async fn read_spilled_file(&self, file: &str) -> Result<(DataBlock, u64)> {
112116
debug_assert!(self.columns_layout.contains_key(file));
113117
let data = self.operator.read(file).await?;
114-
let bytes = data.len() as u64;
118+
let bytes = data.len();
115119

116120
let mut begin = 0;
121+
let instant = Instant::now();
117122
let mut columns = Vec::with_capacity(self.columns_layout.len());
118123
let columns_layout = self.columns_layout.get(file).unwrap();
119124
for column_layout in columns_layout.iter() {
120125
columns.push(deserialize_column(&data[begin..begin + column_layout]).unwrap());
121126
begin += column_layout;
122127
}
123128
let block = DataBlock::new_from_columns(columns);
124-
Ok((block, bytes))
129+
130+
Profile::record_usize_profile(ProfileStatisticsName::SpillReadCount, 1);
131+
Profile::record_usize_profile(ProfileStatisticsName::SpillReadBytes, bytes);
132+
Profile::record_usize_profile(
133+
ProfileStatisticsName::SpillReadTime,
134+
instant.elapsed().as_millis() as usize,
135+
);
136+
137+
Ok((block, bytes as u64))
125138
}
126139

127140
/// Write a [`DataBlock`] to storage.
128141
pub async fn spill_block(&mut self, data: DataBlock) -> Result<(String, u64)> {
142+
let instant = Instant::now();
129143
let unique_name = GlobalUniqName::unique();
130144
let location = format!("{}/{}", self.config.location_prefix, unique_name);
131145
let mut write_bytes = 0;
@@ -155,6 +169,13 @@ impl Spiller {
155169
}
156170
writer.close().await?;
157171

172+
Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1);
173+
Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, write_bytes as usize);
174+
Profile::record_usize_profile(
175+
ProfileStatisticsName::SpillWriteTime,
176+
instant.elapsed().as_millis() as usize,
177+
);
178+
158179
Ok((location, write_bytes))
159180
}
160181

@@ -182,6 +203,7 @@ impl Spiller {
182203
/// Read spilled data with partition id
183204
pub async fn read_spilled_partition(&self, p_id: &u8) -> Result<Vec<DataBlock>> {
184205
debug_assert!(self.partition_location.contains_key(p_id));
206+
185207
let files = self.partition_location.get(p_id).unwrap().to_vec();
186208
let mut spilled_data = Vec::with_capacity(files.len());
187209
for file in files.iter() {
@@ -190,6 +212,7 @@ impl Spiller {
190212
spilled_data.push(block);
191213
}
192214
}
215+
193216
Ok(spilled_data)
194217
}
195218

0 commit comments

Comments
 (0)