Skip to content

Add support for GPML-style graph query parsing #548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions extension/partiql-extension-visualize/src/ast_to_dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ impl ToDot<ast::Expr> for AstToDot {
Expr::CallAgg(c) => self.to_dot(&mut expr_subgraph, c),
Expr::Query(q) => self.to_dot(&mut expr_subgraph, q),
Expr::Error => todo!(),
Expr::GraphMatch(_) => todo!(),
}
}
}
Expand Down
161 changes: 161 additions & 0 deletions partiql-ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use rust_decimal::Decimal as RustDecimal;

use std::fmt;
use std::num::NonZeroU32;

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -401,6 +402,8 @@ pub enum Expr {
Path(AstNode<Path>),
Call(AstNode<Call>),
CallAgg(AstNode<CallAgg>),
/// <expr> MATCH <graph_pattern>
GraphMatch(AstNode<GraphMatch>),

/// Query, e.g. `UNION` | `EXCEPT` | `INTERSECT` | `SELECT` and their parts.
Query(AstNode<Query>),
Expand Down Expand Up @@ -832,6 +835,164 @@ pub enum JoinSpec {
Natural,
}

/// `<expr> MATCH <graph_pattern>`
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatch {
pub expr: Box<Expr>,
pub graph_expr: Box<AstNode<GraphMatchExpr>>,
}

/// The direction of an edge
/// | Orientation | Edge pattern | Abbreviation |
/// |---------------------------+--------------+--------------|
/// | Pointing left | <−[ spec ]− | <− |
/// | Undirected | ~[ spec ]~ | ~ |
/// | Pointing right | −[ spec ]−> | −> |
/// | Left or undirected | <~[ spec ]~ | <~ |
/// | Undirected or right | ~[ spec ]~> | ~> |
/// | Left or right | <−[ spec ]−> | <−> |
/// | Left, undirected or right | −[ spec ]− | − |
///
/// Fig. 5. Table of edge patterns:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GraphMatchDirection {
Left,
Undirected,
Right,
LeftOrUndirected,
UndirectedOrRight,
LeftOrRight,
LeftOrUndirectedOrRight,
}

/// A part of a graph pattern
#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GraphMatchPatternPart {
/// A single node in a graph pattern.
Node(AstNode<GraphMatchNode>),

/// A single edge in a graph pattern.
Edge(AstNode<GraphMatchEdge>),

/// A sub-pattern.
Pattern(AstNode<GraphMatchPattern>),
}

/// A quantifier for graph edges or patterns. (e.g., the `{2,5}` in `MATCH (x)->{2,5}(y)`)
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatchQuantifier {
pub lower: u32,
pub upper: Option<NonZeroU32>,
}

/// A path restrictor
/// | Keyword | Description
/// |----------------+--------------
/// | TRAIL | No repeated edges.
/// | ACYCLIC | No repeated nodes.
/// | SIMPLE | No repeated nodes, except that the first and last nodes may be the same.
///
/// Fig. 7. Table of restrictors:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GraphMatchRestrictor {
Trail,
Acyclic,
Simple,
}

/// A single node in a graph pattern.
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatchNode {
/// an optional node pre-filter, e.g.: `WHERE c.name='Alarm'` in `MATCH (c WHERE c.name='Alarm')`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the node match, e.g.: `x` in `MATCH (x)`
#[visit(skip)]
pub variable: Option<SymbolPrimitive>,
/// the optional label(s) to match for the node, e.g.: `Entity` in `MATCH (x:Entity)`
#[visit(skip)]
pub label: Option<Vec<SymbolPrimitive>>,
}

/// A single edge in a graph pattern.
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatchEdge {
/// edge direction
#[visit(skip)]
pub direction: GraphMatchDirection,
/// an optional quantifier for the edge match
#[visit(skip)]
pub quantifier: Option<AstNode<GraphMatchQuantifier>>,
/// an optional edge pre-filter, e.g.: `WHERE t.capacity>100` in `MATCH −[t:hasSupply WHERE t.capacity>100]−>`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the edge match, e.g.: `t` in `MATCH −[t]−>`
#[visit(skip)]
pub variable: Option<SymbolPrimitive>,
/// the optional label(s) to match for the edge. e.g.: `Target` in `MATCH −[t:Target]−>`
#[visit(skip)]
pub label: Option<Vec<SymbolPrimitive>>,
}

/// A single graph match pattern.
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatchPattern {
#[visit(skip)]
pub restrictor: Option<GraphMatchRestrictor>,
/// an optional quantifier for the entire pattern match
#[visit(skip)]
pub quantifier: Option<AstNode<GraphMatchQuantifier>>,
/// an optional pattern pre-filter, e.g.: `WHERE a.name=b.name` in `MATCH [(a)->(b) WHERE a.name=b.name]`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the pattern, e.g.: `p` in `MATCH p = (a) −[t]−> (b)`
#[visit(skip)]
pub variable: Option<SymbolPrimitive>,
/// the ordered pattern parts
#[visit(skip)]
pub parts: Vec<GraphMatchPatternPart>,
}

/// A path selector
/// | Keyword
/// |------------------
/// | ANY SHORTEST
/// | ALL SHORTEST
/// | ANY
/// | ANY k
/// | SHORTEST k
/// | SHORTEST k GROUP
///
/// Fig. 8. Table of restrictors:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum GraphMatchSelector {
AnyShortest,
AllShortest,
Any,
AnyK(NonZeroU32),
ShortestK(NonZeroU32),
ShortestKGroup(NonZeroU32),
Comment on lines +981 to +983
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does the GPML paper require the k be non-zero? perhaps it's non-sensical to have a k of 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The shortest 0 paths are the empty set.

}

/// A graph match clause as defined in GPML
/// See https://arxiv.org/abs/2112.06217
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GraphMatchExpr {
#[visit(skip)]
pub selector: Option<GraphMatchSelector>,
pub patterns: Vec<AstNode<GraphMatchPattern>>,
}

/// GROUP BY <`grouping_strategy`> <`group_key`>[, <`group_key`>]... \[AS <symbol>\]
#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down
Loading
Loading