Skip to content

Commit 7c1a8a8

Browse files
authored
feat: stage file pattern match the whole string after prefix. (#12935)
1 parent 321b99b commit 7c1a8a8

File tree

11 files changed

+48
-15
lines changed

11 files changed

+48
-15
lines changed

src/common/storage/src/stage.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ pub struct StageFilesInfo {
9292
impl StageFilesInfo {
9393
fn get_pattern(&self) -> Result<Option<Regex>> {
9494
match &self.pattern {
95-
Some(pattern) => match Regex::new(pattern) {
95+
Some(pattern) => match Regex::new(&format!("^{pattern}$")) {
9696
Ok(r) => Ok(Some(r)),
9797
Err(e) => Err(ErrorCode::SyntaxException(format!(
9898
"Pattern format invalid, got:{}, error:{:?}",
@@ -206,6 +206,7 @@ impl StageFilesInfo {
206206
first_only: bool,
207207
max_files: usize,
208208
) -> Result<Vec<StageFileInfo>> {
209+
let prefix_len = if path == "/" { 0 } else { path.len() };
209210
let root_meta = operator.stat(path).await;
210211
match root_meta {
211212
Ok(meta) => match meta.mode() {
@@ -233,7 +234,7 @@ impl StageFilesInfo {
233234
let mut limit: usize = 0;
234235
while let Some(obj) = list.try_next().await? {
235236
let meta = operator.metadata(&obj, StageFileInfo::meta_query()).await?;
236-
if check_file(obj.path(), meta.mode(), &pattern) {
237+
if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
237238
files.push(StageFileInfo::new(obj.path().to_string(), &meta));
238239
if first_only {
239240
return Ok(files);
@@ -263,6 +264,7 @@ fn blocking_list_files_with_pattern(
263264
first_only: bool,
264265
max_files: usize,
265266
) -> Result<Vec<StageFileInfo>> {
267+
let prefix_len = if path == "/" { 0 } else { path.len() };
266268
let operator = operator.blocking();
267269

268270
let root_meta = operator.stat(path);
@@ -293,7 +295,7 @@ fn blocking_list_files_with_pattern(
293295
for obj in list {
294296
let obj = obj?;
295297
let meta = operator.metadata(&obj, StageFileInfo::meta_query())?;
296-
if check_file(obj.path(), meta.mode(), &pattern) {
298+
if check_file(&obj.path()[prefix_len..], meta.mode(), &pattern) {
297299
files.push(StageFileInfo::new(obj.path().to_string(), &meta));
298300
if first_only {
299301
return Ok(files);

src/query/ast/src/ast/format/ast_format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,10 +2179,10 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
21792179
self.children.push(node);
21802180
}
21812181

2182-
fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast str) {
2182+
fn visit_list_stage(&mut self, location: &'ast str, pattern: &'ast Option<String>) {
21832183
let location_format_ctx = AstFormatContext::new(format!("Location {}", location));
21842184
let location_child = FormatTreeNode::new(location_format_ctx);
2185-
let pattern_format_ctx = AstFormatContext::new(format!("Pattern {}", pattern));
2185+
let pattern_format_ctx = AstFormatContext::new(format!("Pattern {:?}", pattern));
21862186
let pattern_child = FormatTreeNode::new(pattern_format_ctx);
21872187

21882188
let name = "ListStage".to_string();

src/query/ast/src/ast/statements/statement.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ pub enum Statement {
186186
},
187187
ListStage {
188188
location: String,
189-
pattern: String,
189+
pattern: Option<String>,
190190
},
191191

192192
// UserDefinedFileFormat
@@ -452,7 +452,7 @@ impl Display for Statement {
452452
Statement::AlterUDF(stmt) => write!(f, "{stmt}")?,
453453
Statement::ListStage { location, pattern } => {
454454
write!(f, "LIST @{location}")?;
455-
if !pattern.is_empty() {
455+
if let Some(pattern) = pattern {
456456
write!(f, " PATTERN = '{pattern}'")?;
457457
}
458458
}

src/query/ast/src/parser/statement.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,7 +1054,7 @@ pub fn statement(i: Input) -> IResult<StatementMsg> {
10541054
},
10551055
|(_, location, opt_pattern)| Statement::ListStage {
10561056
location,
1057-
pattern: opt_pattern.map(|v| v.2).unwrap_or_default(),
1057+
pattern: opt_pattern.map(|v| v.2),
10581058
},
10591059
);
10601060

src/query/ast/src/visitors/visitor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ pub trait Visitor<'ast>: Sized {
531531

532532
fn visit_remove_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
533533

534-
fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {}
534+
fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast Option<String>) {}
535535

536536
fn visit_create_file_format(
537537
&mut self,

src/query/ast/src/visitors/visitor_mut.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ pub trait VisitorMut: Sized {
546546

547547
fn visit_remove_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
548548

549-
fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut String) {}
549+
fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut Option<String>) {}
550550

551551
fn visit_create_file_format(
552552
&mut self,

src/query/ast/tests/it/testdata/statement.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7120,7 +7120,7 @@ LIST @stage_a
71207120
---------- AST ------------
71217121
ListStage {
71227122
location: "stage_a",
7123-
pattern: "",
7123+
pattern: None,
71247124
}
71257125

71267126

@@ -7131,7 +7131,7 @@ LIST @~
71317131
---------- AST ------------
71327132
ListStage {
71337133
location: "~",
7134-
pattern: "",
7134+
pattern: None,
71357135
}
71367136

71377137

src/query/sql/src/planner/binder/binder.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,14 @@ impl<'a> Binder {
346346

347347
// Stages
348348
Statement::ShowStages => self.bind_rewrite_to_query(bind_context, "SELECT name, stage_type, number_of_files, creator, comment FROM system.stages ORDER BY name", RewriteKind::ShowStages).await?,
349-
Statement::ListStage { location, pattern } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}', pattern => '{pattern}')").as_str(), RewriteKind::ListStage).await?,
349+
Statement::ListStage { location, pattern } => {
350+
let pattern = if let Some(pattern) = pattern {
351+
format!(", pattern => '{pattern}'")
352+
} else {
353+
"".to_string()
354+
};
355+
self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM LIST_STAGE(location => '@{location}'{pattern})").as_str(), RewriteKind::ListStage).await?
356+
},
350357
Statement::DescribeStage { stage_name } => self.bind_rewrite_to_query(bind_context, format!("SELECT * FROM system.stages WHERE name = '{stage_name}'").as_str(), RewriteKind::DescribeStage).await?,
351358
Statement::CreateStage(stmt) => self.bind_create_stage(stmt).await?,
352359
Statement::DropStage {

tests/sqllogictests/suites/stage/list_stage

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ parquet/multi_page/multi_page_3.parquet 4020 NULL
88
parquet/multi_page/multi_page_4.parquet 6636 NULL
99

1010
query
11-
select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.]*')
11+
select name, size, creator from list_stage(location => '@data/parquet/', pattern => 'complex[.].*')
1212
----
1313
parquet/complex.parquet 92762 NULL
1414

1515
query
16-
select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.]*')
16+
select name, size, creator from list_stage(location => '@data/', pattern => 'parquet/complex[.].*')
1717
----
1818
parquet/complex.parquet 92762 NULL
1919

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# the following 2 cases show that `pattern` only matching sub path (or suffix) after the 'parquet/' prefix
2+
# wrong case
3+
query
4+
select name from list_stage(location => '@data/parquet/', pattern => 'parquet/.*_page_1.*') order by name
5+
----
6+
7+
# right case
8+
query
9+
select name from list_stage(location => '@data/parquet/', pattern => 'multi_page/.*_page_1.*') order by name
10+
----
11+
parquet/multi_page/multi_page_1.parquet
12+
13+
14+
# the following 2 cases show that `pattern` need to matching match the whole suffix, it is in fact '%<pattern>$'
15+
# wrong case
16+
query
17+
select name from list_stage(location => '@data/parquet/', pattern => 'multi_page_1') order by name
18+
----
19+
20+
query
21+
select name from list_stage(location => '@data/parquet/', pattern => '.*multi_page_1.*') order by name
22+
----
23+
parquet/multi_page/multi_page_1.parquet

0 commit comments

Comments
 (0)