Skip to content

Commit 5906a93

Browse files
committed
Add experimental draft support for GPML-style graph query
1 parent f99ce48 commit 5906a93

File tree

6 files changed

+549
-5
lines changed

6 files changed

+549
-5
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88
## [Unreleased]
9+
- An experimental (pending [#15](https://github.com/partiql/partiql-docs/issues/15)) embedding of a subset of
10+
the [GPML (Graph Pattern Matching Language)](https://arxiv.org/abs/2112.06217) graph query into the `FROM` clause,
11+
supporting. The use within the grammar is based on the assumption of a new graph data type being added to the
12+
specification of data types within PartiQL, and should be considered experimental until the semantics of the graph
13+
data type are specified.
14+
- basic and abbreviated node and edge patterns (section 4.1 of the GPML paper)
15+
- concatenated path patterns (section 4.2 of the GPML paper)
16+
- path variables (section 4.2 of the GPML paper)
17+
- graph patterns (i.e., comma separated path patterns) (section 4.3 of the GPML paper)
18+
- parenthesized patterns (section 4.4 of the GPML paper)
19+
- path quantifiers (section 4.4 of the GPML paper)
20+
- restrictors and selector (section 5.1 of the GPML paper)
21+
- pre-filters and post-filters (section 5.2 of the GPML paper)
922

1023
## [0.1.0] - 2022-08-05
1124
### Added
@@ -18,5 +31,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1831
- PartiQL Playground proof of concept (POC)
1932
- PartiQL CLI with REPL and query visualization features
2033

34+
2135
[Unreleased]: https://github.com/partiql/partiql-lang-rust/compare/v0.1.0...HEAD
2236
[0.1.0]: https://github.com/partiql/partiql-lang-rust/compare/v0.1.0

partiql-ast/src/ast.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use rust_decimal::Decimal as RustDecimal;
1212

1313
use std::fmt;
1414
use std::fmt::Display;
15+
use std::num::NonZeroU32;
1516
use std::ops::Range;
1617

1718
#[cfg(feature = "serde")]
@@ -258,6 +259,13 @@ pub type CallAst = AstBytePos<Call>;
258259
pub type CaseAst = AstBytePos<Case>;
259260
pub type FromClauseAst = AstBytePos<FromClause>;
260261
pub type FromLetAst = AstBytePos<FromLet>;
262+
pub type GraphMatchAst = AstBytePos<GraphMatch>;
263+
pub type GraphMatchExprAst = AstBytePos<GraphMatchExpr>;
264+
pub type GraphMatchEdgeAst = AstBytePos<GraphMatchEdge>;
265+
pub type GraphMatchNodeAst = AstBytePos<GraphMatchNode>;
266+
pub type GraphMatchPatternAst = AstBytePos<GraphMatchPattern>;
267+
pub type GraphMatchPatternPartAst = AstBytePos<GraphMatchPatternPart>;
268+
pub type GraphMatchQuantifierAst = AstBytePos<GraphMatchQuantifier>;
261269
pub type GroupByExprAst = AstBytePos<GroupByExpr>;
262270
pub type GroupKeyAst = AstBytePos<GroupKey>;
263271
pub type InAst = AstBytePos<In>;
@@ -675,6 +683,9 @@ pub enum FromClause {
675683
FromLet(FromLetAst),
676684
/// <from_source> JOIN \[INNER | LEFT | RIGHT | FULL\] <from_source> ON <expr>
677685
Join(JoinAst),
686+
687+
/// <expr> MATCH <graph_pattern>
688+
GraphMatch(GraphMatchAst),
678689
}
679690

680691
#[derive(Clone, Debug, PartialEq)]
@@ -724,6 +735,152 @@ pub enum JoinKind {
724735
Cross,
725736
}
726737

738+
#[derive(Clone, Debug, PartialEq)]
739+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
740+
pub struct GraphMatch {
741+
pub expr: Box<Expr>,
742+
pub graph_expr: Box<GraphMatchExprAst>,
743+
}
744+
745+
/// The direction of an edge
746+
/// | Orientation | Edge pattern | Abbreviation |
747+
/// |---------------------------+--------------+--------------|
748+
/// | Pointing left | <−[ spec ]− | <− |
749+
/// | Undirected | ~[ spec ]~ | ~ |
750+
/// | Pointing right | −[ spec ]−> | −> |
751+
/// | Left or undirected | <~[ spec ]~ | <~ |
752+
/// | Undirected or right | ~[ spec ]~> | ~> |
753+
/// | Left or right | <−[ spec ]−> | <−> |
754+
/// | Left, undirected or right | −[ spec ]− | − |
755+
///
756+
/// Fig. 5. Table of edge patterns:
757+
/// https://arxiv.org/abs/2112.06217
758+
#[derive(Clone, Debug, PartialEq)]
759+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
760+
pub enum GraphMatchDirection {
761+
Left,
762+
Undirected,
763+
Right,
764+
LeftOrUndirected,
765+
UndirectedOrRight,
766+
LeftOrRight,
767+
LeftOrUndirectedOrRight,
768+
}
769+
770+
/// A part of a graph pattern
771+
#[derive(Clone, Debug, PartialEq)]
772+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
773+
pub enum GraphMatchPatternPart {
774+
/// A single node in a graph pattern.
775+
Node(GraphMatchNodeAst),
776+
777+
/// A single edge in a graph pattern.
778+
Edge(GraphMatchEdgeAst),
779+
780+
/// A sub-pattern.
781+
Pattern(GraphMatchPatternAst),
782+
}
783+
784+
/// A quantifier for graph edges or patterns. (e.g., the `{2,5}` in `MATCH (x)->{2,5}(y)`)
785+
#[derive(Clone, Debug, PartialEq)]
786+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
787+
pub struct GraphMatchQuantifier {
788+
pub lower: u32,
789+
pub upper: Option<NonZeroU32>,
790+
}
791+
792+
/// A path restrictor
793+
/// | Keyword | Description
794+
/// |----------------+--------------
795+
/// | TRAIL | No repeated edges.
796+
/// | ACYCLIC | No repeated nodes.
797+
/// | SIMPLE | No repeated nodes, except that the first and last nodes may be the same.
798+
///
799+
/// Fig. 7. Table of restrictors:
800+
/// https://arxiv.org/abs/2112.06217
801+
#[derive(Clone, Debug, PartialEq)]
802+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
803+
pub enum GraphMatchRestrictor {
804+
Trail,
805+
Acyclic,
806+
Simple,
807+
}
808+
809+
/// A single node in a graph pattern.
810+
#[derive(Clone, Debug, PartialEq)]
811+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
812+
pub struct GraphMatchNode {
813+
/// an optional node pre-filter, e.g.: `WHERE c.name='Alarm'` in `MATCH (c WHERE c.name='Alarm')`
814+
pub prefilter: Option<Box<Expr>>,
815+
/// the optional element variable of the node match, e.g.: `x` in `MATCH (x)`
816+
pub variable: Option<SymbolPrimitive>,
817+
/// the optional label(s) to match for the node, e.g.: `Entity` in `MATCH (x:Entity)`
818+
pub label: Option<Vec<SymbolPrimitive>>,
819+
}
820+
821+
/// A single edge in a graph pattern.
822+
#[derive(Clone, Debug, PartialEq)]
823+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
824+
pub struct GraphMatchEdge {
825+
/// edge direction
826+
pub direction: GraphMatchDirection,
827+
/// an optional quantifier for the edge match
828+
pub quantifier: Option<GraphMatchQuantifierAst>,
829+
/// an optional edge pre-filter, e.g.: `WHERE t.capacity>100` in `MATCH −[t:hasSupply WHERE t.capacity>100]−>`
830+
pub prefilter: Option<Box<Expr>>,
831+
/// the optional element variable of the edge match, e.g.: `t` in `MATCH −[t]−>`
832+
pub variable: Option<SymbolPrimitive>,
833+
/// the optional label(s) to match for the edge. e.g.: `Target` in `MATCH −[t:Target]−>`
834+
pub label: Option<Vec<SymbolPrimitive>>,
835+
}
836+
837+
/// A single graph match pattern.
838+
#[derive(Clone, Debug, PartialEq)]
839+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
840+
pub struct GraphMatchPattern {
841+
pub restrictor: Option<GraphMatchRestrictor>,
842+
/// an optional quantifier for the entire pattern match
843+
pub quantifier: Option<GraphMatchQuantifierAst>,
844+
/// an optional pattern pre-filter, e.g.: `WHERE a.name=b.name` in `MATCH [(a)->(b) WHERE a.name=b.name]`
845+
pub prefilter: Option<Box<Expr>>,
846+
/// the optional element variable of the pattern, e.g.: `p` in `MATCH p = (a) −[t]−> (b)`
847+
pub variable: Option<SymbolPrimitive>,
848+
/// the ordered pattern parts
849+
pub parts: Vec<GraphMatchPatternPart>,
850+
}
851+
852+
/// A path selector
853+
/// | Keyword
854+
/// |------------------
855+
/// | ANY SHORTEST
856+
/// | ALL SHORTEST
857+
/// | ANY
858+
/// | ANY k
859+
/// | SHORTEST k
860+
/// | SHORTEST k GROUP
861+
///
862+
/// Fig. 8. Table of restrictors:
863+
/// https://arxiv.org/abs/2112.06217
864+
#[derive(Clone, Debug, PartialEq)]
865+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
866+
pub enum GraphMatchSelector {
867+
AnyShortest,
868+
AllShortest,
869+
Any,
870+
AnyK(NonZeroU32),
871+
ShortestK(NonZeroU32),
872+
ShortestKGroup(NonZeroU32),
873+
}
874+
875+
/// A graph match clause as defined in GPML
876+
/// See https://arxiv.org/abs/2112.06217
877+
#[derive(Clone, Debug, PartialEq)]
878+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
879+
pub struct GraphMatchExpr {
880+
pub selector: Option<GraphMatchSelector>,
881+
pub patterns: Vec<GraphMatchPatternAst>,
882+
}
883+
727884
/// A generic pair of expressions. Used in the `pub struct`, `searched_case`
728885
/// and `simple_case` expr variants above.
729886
#[derive(Clone, Debug, PartialEq)]

partiql-parser/benches/bench_parse.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ const Q_COMPLEX_FEXPR: &str = r#"
3434
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
3535
"#;
3636

37+
const Q_COMPLEX_MATCH: &str = r#"
38+
SELECT (
39+
SELECT numRec, data
40+
FROM
41+
(deltaGraph MATCH (t) -[:hasChange]-> (dt), (dt) -[:checkPointedBy]-> (t1)),
42+
(
43+
SELECT foo(u.id), bar(review), rindex
44+
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
45+
) as data,
46+
delta.numRec as numRec
47+
)
48+
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
49+
"#;
50+
3751
fn parse_bench(c: &mut Criterion) {
3852
fn parse(text: &str) -> ParserResult {
3953
Parser::default().parse(text)
@@ -45,6 +59,9 @@ fn parse_bench(c: &mut Criterion) {
4559
c.bench_function("parse-complex-fexpr", |b| {
4660
b.iter(|| parse(black_box(Q_COMPLEX_FEXPR)))
4761
});
62+
c.bench_function("parse-complex-match", |b| {
63+
b.iter(|| parse(black_box(Q_COMPLEX_MATCH)))
64+
});
4865
}
4966

5067
criterion_group! {

partiql-parser/src/lexer.rs

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ pub enum Token<'input> {
467467
Caret,
468468
#[token(".")]
469469
Period,
470+
#[token("~")]
471+
Tilde,
470472
#[token("||")]
471473
DblPipe,
472474

@@ -512,10 +514,14 @@ pub enum Token<'input> {
512514
// Keywords
513515
#[regex("(?i:All)")]
514516
All,
517+
#[regex("(?i:Acyclic)")]
518+
Acyclic,
515519
#[regex("(?i:Asc)")]
516520
Asc,
517521
#[regex("(?i:And)")]
518522
And,
523+
#[regex("(?i:Any)")]
524+
Any,
519525
#[regex("(?i:As)")]
520526
As,
521527
#[regex("(?i:At)")]
@@ -576,6 +582,8 @@ pub enum Token<'input> {
576582
Like,
577583
#[regex("(?i:Limit)")]
578584
Limit,
585+
#[regex("(?i:Match)")]
586+
Match,
579587
#[regex("(?i:Missing)")]
580588
Missing,
581589
#[regex("(?i:Natural)")]
@@ -612,8 +620,14 @@ pub enum Token<'input> {
612620
Time,
613621
#[regex("(?i:Timestamp)")]
614622
Timestamp,
623+
#[regex("(?i:Simple)")]
624+
Simple,
625+
#[regex("(?i:Shortest)")]
626+
Shortest,
615627
#[regex("(?i:Then)")]
616628
Then,
629+
#[regex("(?i:Trail)")]
630+
Trail,
617631
#[regex("(?i:True)")]
618632
True,
619633
#[regex("(?i:Union)")]
@@ -642,9 +656,11 @@ impl<'input> Token<'input> {
642656
pub fn is_keyword(&self) -> bool {
643657
matches!(
644658
self,
645-
Token::All
659+
Token::Acyclic
660+
| Token::All
646661
| Token::Asc
647662
| Token::And
663+
| Token::Any
648664
| Token::As
649665
| Token::At
650666
| Token::Between
@@ -671,6 +687,7 @@ impl<'input> Token<'input> {
671687
| Token::Left
672688
| Token::Like
673689
| Token::Limit
690+
| Token::Match
674691
| Token::Missing
675692
| Token::Natural
676693
| Token::Not
@@ -689,7 +706,10 @@ impl<'input> Token<'input> {
689706
| Token::Table
690707
| Token::Time
691708
| Token::Timestamp
709+
| Token::Simple
710+
| Token::Shortest
692711
| Token::Then
712+
| Token::Trail
693713
| Token::Union
694714
| Token::Unpivot
695715
| Token::Using
@@ -736,6 +756,7 @@ impl<'input> fmt::Display for Token<'input> {
736756
Token::Slash => write!(f, "/"),
737757
Token::Caret => write!(f, "^"),
738758
Token::Period => write!(f, "."),
759+
Token::Tilde => write!(f, "~"),
739760
Token::DblPipe => write!(f, "||"),
740761
Token::UnquotedIdent(id) => write!(f, "<{}:UNQUOTED_IDENT>", id),
741762
Token::QuotedIdent(id) => write!(f, "<{}:QUOTED_IDENT>", id),
@@ -748,9 +769,11 @@ impl<'input> fmt::Display for Token<'input> {
748769
Token::EmbeddedIonQuote => write!(f, "<ION>"),
749770
Token::Ion(txt) => write!(f, "<{}:ION>", txt),
750771

751-
Token::All
772+
Token::Acyclic
773+
| Token::All
752774
| Token::Asc
753775
| Token::And
776+
| Token::Any
754777
| Token::As
755778
| Token::At
756779
| Token::Between
@@ -781,6 +804,7 @@ impl<'input> fmt::Display for Token<'input> {
781804
| Token::Left
782805
| Token::Like
783806
| Token::Limit
807+
| Token::Match
784808
| Token::Missing
785809
| Token::Natural
786810
| Token::Not
@@ -799,7 +823,10 @@ impl<'input> fmt::Display for Token<'input> {
799823
| Token::Table
800824
| Token::Time
801825
| Token::Timestamp
826+
| Token::Simple
827+
| Token::Shortest
802828
| Token::Then
829+
| Token::Trail
803830
| Token::True
804831
| Token::Union
805832
| Token::Unpivot
@@ -836,7 +863,8 @@ mod tests {
836863
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
837864
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
838865
Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
839-
Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End";
866+
Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End \
867+
Match Any Shortest Trail Acyclic Simple";
840868
let symbols = symbols.split(' ').chain(primitives.split(' '));
841869
let keywords = keywords.split(' ');
842870

@@ -858,7 +886,7 @@ mod tests {
858886
"<unquoted_atident:UNQUOTED_ATIDENT>", "GROUP", "<quoted_atident:QUOTED_ATIDENT>",
859887
"FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT", "ESCAPE", "DESC", "CROSS", "TABLE",
860888
"TIME", "TIMESTAMP", "DATE", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
861-
"CASE", "WHEN", "THEN", "ELSE", "END"
889+
"CASE", "WHEN", "THEN", "ELSE", "END", "MATCH", "ANY", "SHORTEST", "TRAIL", "ACYCLIC", "SIMPLE"
862890
];
863891
let displayed = toks
864892
.into_iter()

0 commit comments

Comments
 (0)