Skip to content

Commit 5d4296c

Browse files
test: Add MATCH_RECOGNIZE sqllogic tests
1 parent 543c7e6 commit 5d4296c

33 files changed

+13373
-0
lines changed

datafusion/sqllogictest/test_files/match_recognize/explain/basic_syntax/optional_clauses.slt

Lines changed: 4155 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Import common test data
19+
include ../../init_data.slt.part
20+
21+
# Test minimal MATCH_RECOGNIZE with required clauses only
22+
query TT
23+
EXPLAIN SELECT * FROM stock_price
24+
MATCH_RECOGNIZE (
25+
ALL ROWS PER MATCH
26+
PATTERN (A)
27+
DEFINE A AS price > 100
28+
)
29+
----
30+
logical_plan
31+
01)Projection: stock_price.company, stock_price.price_date, stock_price.price
32+
02)--Filter: __mr_is_included_row
33+
03)----Projection: stock_price.company, stock_price.price_date, stock_price.price, __mr_is_included_row
34+
04)------MatchRecognizePattern: pattern=[A]
35+
05)--------Projection: stock_price.company, stock_price.price_date, stock_price.price, stock_price.price > Int32(100) AS __mr_symbol_A
36+
06)----------TableScan: stock_price projection=[company, price_date, price]
37+
physical_plan
38+
01)CoalesceBatchesExec: target_batch_size=8192, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
39+
02)--FilterExec: __mr_is_included_row@3, projection=[company@0, price_date@1, price@2], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
40+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
41+
04)------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, __mr_is_included_row@8 as __mr_is_included_row], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
42+
05)--------MatchRecognizePatternExec: pattern=[A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_classifier:Utf8, __mr_match_number:UInt64, __mr_match_sequence_number:UInt64, __mr_is_last_match_row:Boolean, __mr_is_included_row:Boolean]
43+
06)----------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, price@2 > 100 as __mr_symbol_A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N]
44+
07)------------DataSourceExec: partitions=1, partition_sizes=[1], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
45+
46+
# Test missing PATTERN clause (should fail)
47+
query error DataFusion error: SQL error: ParserError\("Expected: PATTERN, found: DEFINE at Line: 3, Column: 5"\)
48+
SELECT * FROM stock_price
49+
MATCH_RECOGNIZE (
50+
DEFINE A AS price > 100
51+
)
52+
53+
# Test missing DEFINE clause when pattern uses symbols (should fail)
54+
query error DataFusion error: SQL error: ParserError\("Expected: DEFINE, found: \) at Line: 4, Column: 1"\)
55+
SELECT * FROM stock_price
56+
MATCH_RECOGNIZE (
57+
PATTERN (A B)
58+
)
59+
60+
# Test undefined symbol in pattern (B is implicitly TRUE)
61+
query TT
62+
EXPLAIN SELECT * FROM stock_price
63+
MATCH_RECOGNIZE (
64+
ALL ROWS PER MATCH
65+
PATTERN (A B)
66+
DEFINE A AS price > 100
67+
)
68+
----
69+
logical_plan
70+
01)Projection: stock_price.company, stock_price.price_date, stock_price.price
71+
02)--Filter: __mr_is_included_row
72+
03)----Projection: stock_price.company, stock_price.price_date, stock_price.price, __mr_is_included_row
73+
04)------MatchRecognizePattern: pattern=[A B]
74+
05)--------Projection: stock_price.company, stock_price.price_date, stock_price.price, stock_price.price > Int32(100) AS __mr_symbol_A, Boolean(true) AS __mr_symbol_B
75+
06)----------TableScan: stock_price projection=[company, price_date, price]
76+
physical_plan
77+
01)CoalesceBatchesExec: target_batch_size=8192, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
78+
02)--FilterExec: __mr_is_included_row@3, projection=[company@0, price_date@1, price@2], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
79+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
80+
04)------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, __mr_is_included_row@9 as __mr_is_included_row], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
81+
05)--------MatchRecognizePatternExec: pattern=[A B], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_symbol_B:Boolean, __mr_classifier:Utf8, __mr_match_number:UInt64, __mr_match_sequence_number:UInt64, __mr_is_last_match_row:Boolean, __mr_is_included_row:Boolean]
82+
06)----------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, price@2 > 100 as __mr_symbol_A, true as __mr_symbol_B], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_symbol_B:Boolean]
83+
07)------------DataSourceExec: partitions=1, partition_sizes=[1], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
84+
85+
# Test unused symbol in DEFINE (B defined but not used in pattern)
86+
query TT
87+
EXPLAIN SELECT * FROM stock_price
88+
MATCH_RECOGNIZE (
89+
ALL ROWS PER MATCH
90+
PATTERN (A)
91+
DEFINE
92+
A AS price > 100,
93+
B AS price < 50
94+
)
95+
----
96+
logical_plan
97+
01)Projection: stock_price.company, stock_price.price_date, stock_price.price
98+
02)--Filter: __mr_is_included_row
99+
03)----Projection: stock_price.company, stock_price.price_date, stock_price.price, __mr_is_included_row
100+
04)------MatchRecognizePattern: pattern=[A]
101+
05)--------Projection: stock_price.company, stock_price.price_date, stock_price.price, stock_price.price > Int32(100) AS __mr_symbol_A
102+
06)----------TableScan: stock_price projection=[company, price_date, price]
103+
physical_plan
104+
01)CoalesceBatchesExec: target_batch_size=8192, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
105+
02)--FilterExec: __mr_is_included_row@3, projection=[company@0, price_date@1, price@2], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
106+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
107+
04)------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, __mr_is_included_row@8 as __mr_is_included_row], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
108+
05)--------MatchRecognizePatternExec: pattern=[A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_classifier:Utf8, __mr_match_number:UInt64, __mr_match_sequence_number:UInt64, __mr_is_last_match_row:Boolean, __mr_is_included_row:Boolean]
109+
06)----------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, price@2 > 100 as __mr_symbol_A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N]
110+
07)------------DataSourceExec: partitions=1, partition_sizes=[1], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
111+
112+
# Test multiple symbols in pattern
113+
query TT
114+
EXPLAIN SELECT * FROM stock_price
115+
MATCH_RECOGNIZE (
116+
ALL ROWS PER MATCH
117+
PATTERN (A B C)
118+
DEFINE
119+
A AS price > 100,
120+
B AS price < 50,
121+
C AS price > 75
122+
)
123+
----
124+
logical_plan
125+
01)Projection: stock_price.company, stock_price.price_date, stock_price.price
126+
02)--Filter: __mr_is_included_row
127+
03)----Projection: stock_price.company, stock_price.price_date, stock_price.price, __mr_is_included_row
128+
04)------MatchRecognizePattern: pattern=[A B C]
129+
05)--------Projection: stock_price.company, stock_price.price_date, stock_price.price, stock_price.price > Int32(100) AS __mr_symbol_A, stock_price.price < Int32(50) AS __mr_symbol_B, stock_price.price > Int32(75) AS __mr_symbol_C
130+
06)----------TableScan: stock_price projection=[company, price_date, price]
131+
physical_plan
132+
01)CoalesceBatchesExec: target_batch_size=8192, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
133+
02)--FilterExec: __mr_is_included_row@3, projection=[company@0, price_date@1, price@2], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
134+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
135+
04)------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, __mr_is_included_row@10 as __mr_is_included_row], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
136+
05)--------MatchRecognizePatternExec: pattern=[A B C], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_symbol_B:Boolean;N, __mr_symbol_C:Boolean;N, __mr_classifier:Utf8, __mr_match_number:UInt64, __mr_match_sequence_number:UInt64, __mr_is_last_match_row:Boolean, __mr_is_included_row:Boolean]
137+
06)----------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, price@2 > 100 as __mr_symbol_A, price@2 < 50 as __mr_symbol_B, price@2 > 75 as __mr_symbol_C], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_symbol_B:Boolean;N, __mr_symbol_C:Boolean;N]
138+
07)------------DataSourceExec: partitions=1, partition_sizes=[1], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
139+
140+
# Test complex DEFINE expressions
141+
query TT
142+
EXPLAIN SELECT * FROM stock_price
143+
MATCH_RECOGNIZE (
144+
ALL ROWS PER MATCH
145+
PATTERN (A)
146+
DEFINE A AS price > 100 AND company = 'ACME'
147+
)
148+
----
149+
logical_plan
150+
01)Projection: stock_price.company, stock_price.price_date, stock_price.price
151+
02)--Filter: __mr_is_included_row
152+
03)----Projection: stock_price.company, stock_price.price_date, stock_price.price, __mr_is_included_row
153+
04)------MatchRecognizePattern: pattern=[A]
154+
05)--------Projection: stock_price.company, stock_price.price_date, stock_price.price, stock_price.price > Int32(100) AND stock_price.company = Utf8View("ACME") AS __mr_symbol_A
155+
06)----------TableScan: stock_price projection=[company, price_date, price]
156+
physical_plan
157+
01)CoalesceBatchesExec: target_batch_size=8192, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
158+
02)--FilterExec: __mr_is_included_row@3, projection=[company@0, price_date@1, price@2], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]
159+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
160+
04)------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, __mr_is_included_row@8 as __mr_is_included_row], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_is_included_row:Boolean]
161+
05)--------MatchRecognizePatternExec: pattern=[A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N, __mr_classifier:Utf8, __mr_match_number:UInt64, __mr_match_sequence_number:UInt64, __mr_is_last_match_row:Boolean, __mr_is_included_row:Boolean]
162+
06)----------ProjectionExec: expr=[company@0 as company, price_date@1 as price_date, price@2 as price, price@2 > 100 AND company@0 = ACME as __mr_symbol_A], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N, __mr_symbol_A:Boolean;N]
163+
07)------------DataSourceExec: partitions=1, partition_sizes=[1], schema=[company:Utf8View;N, price_date:Date32;N, price:Int32;N]

0 commit comments

Comments
 (0)