Skip to content

Commit 7c7ea77

Browse files
jpschorram357
andauthored
Parse SQL-style typed-literal strings (#163)
* Parse SQL-style typed-literal strings Co-authored-by: Arash Maymandi <27716912+am357@users.noreply.github.com>
1 parent 842a75d commit 7c7ea77

File tree

7 files changed

+227
-153
lines changed

7 files changed

+227
-153
lines changed

partiql-ast/src/ast.rs

Lines changed: 20 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,8 @@ pub enum Lit {
381381
HexStringLit(String),
382382
DateTimeLit(DateTimeLit),
383383
CollectionLit(CollectionLit),
384+
/// E.g. `TIME WITH TIME ZONE` in `SELECT TIME WITH TIME ZONE '12:00' FROM ...`
385+
TypedLit(String, Type),
384386
}
385387

386388
#[derive(Clone, Debug, PartialEq)]
@@ -536,20 +538,6 @@ pub struct Sexp {
536538
pub values: Vec<Box<Expr>>,
537539
}
538540

539-
#[derive(Clone, Debug, PartialEq)]
540-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
541-
pub struct Date {
542-
pub year: i32,
543-
pub month: i32,
544-
pub day: i32,
545-
}
546-
547-
#[derive(Clone, Debug, PartialEq)]
548-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
549-
pub struct LitTime {
550-
pub value: TimeValue,
551-
}
552-
553541
#[derive(Clone, Debug, PartialEq)]
554542
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
555543
pub struct Call {
@@ -564,11 +552,17 @@ pub enum CallArg {
564552
Star(),
565553
/// positional argument to a function call (e.g., all arguments in `foo(1, 'a', 3)`)
566554
Positional(Box<Expr>),
555+
556+
/// E.g. `INT` in `foo(INT)`
557+
PositionalType(Type),
567558
/// named argument to a function call (e.g., the `"from" : 2` in `substring(a, "from":2)`
568559
Named {
569560
name: SymbolPrimitive,
570-
value: Option<Box<Expr>>,
561+
value: Box<Expr>,
571562
},
563+
564+
/// E.g. `AS: VARCHAR` in `CAST('abc' AS VARCHAR`
565+
NamedType { name: SymbolPrimitive, ty: Type },
572566
}
573567

574568
#[derive(Clone, Debug, PartialEq)]
@@ -579,40 +573,6 @@ pub struct CallAgg {
579573
pub args: Vec<Box<Expr>>,
580574
}
581575

582-
#[derive(Clone, Debug, PartialEq)]
583-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
584-
pub struct Cast {
585-
pub value: Box<Expr>,
586-
pub as_type: Type,
587-
}
588-
589-
#[derive(Clone, Debug, PartialEq)]
590-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
591-
pub struct CanCast {
592-
pub value: Box<Expr>,
593-
pub as_type: Type,
594-
}
595-
596-
#[derive(Clone, Debug, PartialEq)]
597-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
598-
pub struct CanLossLessCast {
599-
pub value: Box<Expr>,
600-
pub as_type: Type,
601-
}
602-
603-
#[derive(Clone, Debug, PartialEq)]
604-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
605-
pub struct NullIf {
606-
pub expr1: Box<Expr>,
607-
pub expr2: Box<Expr>,
608-
}
609-
610-
#[derive(Clone, Debug, PartialEq)]
611-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
612-
pub struct Coalesce {
613-
pub args: Vec<Box<Expr>>,
614-
}
615-
616576
#[derive(Clone, Debug, PartialEq)]
617577
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
618578
pub struct Select {
@@ -624,18 +584,6 @@ pub struct Select {
624584
pub having: Option<Box<Expr>>,
625585
}
626586

627-
#[derive(Clone, Debug, PartialEq)]
628-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
629-
pub struct TimeValue {
630-
pub hour: i32,
631-
pub minute: i32,
632-
pub second: i32,
633-
pub nano: i32,
634-
pub precision: i32,
635-
pub with_time_zone: bool,
636-
pub tz_minutes: Option<i32>,
637-
}
638-
639587
#[derive(Clone, Debug, PartialEq)]
640588
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
641589
pub struct Path {
@@ -967,20 +915,26 @@ pub enum Type {
967915

968916
#[derive(Clone, Debug, PartialEq)]
969917
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
970-
pub struct CharacterType {
971-
pub length: Option<LongPrimitive>,
918+
pub enum CustomTypeParam {
919+
/// E.g. `2` in `VARCHAR(2)`
920+
Lit(Lit),
921+
/// E.g. `INT` in `FooType(INT)`
922+
Type(Type),
972923
}
973924

974925
#[derive(Clone, Debug, PartialEq)]
975926
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
976-
pub struct CharacterVaryingType {
977-
pub length: Option<LongPrimitive>,
927+
pub enum CustomTypePart {
928+
/// E.g. any of `WITH`, `TIME`, and`ZONE` in `TIME(20) WITH TIME ZONE`
929+
Name(SymbolPrimitive),
930+
/// E.g. `TIME(20) in `TIME(20) WITH TIME ZONE`
931+
Parameterized(SymbolPrimitive, Vec<CustomTypeParam>),
978932
}
979933

980934
#[derive(Clone, Debug, PartialEq)]
981935
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
982936
pub struct CustomType {
983-
pub name: SymbolPrimitive,
937+
pub parts: Vec<CustomTypePart>,
984938
}
985939

986940
#[derive(Clone, Debug, PartialEq)]
@@ -990,9 +944,3 @@ pub struct SymbolPrimitive {
990944
// Optional because string literal symbols don't have case sensitivity
991945
pub case: Option<CaseSensitivity>,
992946
}
993-
994-
#[derive(Clone, Debug, PartialEq)]
995-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
996-
pub struct LongPrimitive {
997-
pub value: i32,
998-
}

partiql-cli/src/visualize/ast_to_dot.rs

Lines changed: 106 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use partiql_ast::ast;
22
use std::fmt::Display;
33

44
use dot_writer::{Attributes, DotWriter, Node, NodeId, Scope, Shape};
5+
use partiql_ast::ast::{CallArg, CustomTypeParam, CustomTypePart, Lit, Type};
56

67
/*
78
subgraph cluster_legend {
@@ -184,37 +185,82 @@ impl ToDot<ast::Expr> for AstToDot {
184185
}
185186
}
186187

188+
#[inline]
189+
fn lit_to_str(ast: &Lit) -> String {
190+
match ast {
191+
Lit::Null => "NULL".to_string(),
192+
Lit::Missing => "MISSING".to_string(),
193+
Lit::Int8Lit(l) => l.to_string(),
194+
Lit::Int16Lit(l) => l.to_string(),
195+
Lit::Int32Lit(l) => l.to_string(),
196+
Lit::Int64Lit(l) => l.to_string(),
197+
Lit::DecimalLit(l) => l.to_string(),
198+
Lit::NumericLit(l) => l.to_string(),
199+
Lit::RealLit(l) => l.to_string(),
200+
Lit::FloatLit(l) => l.to_string(),
201+
Lit::DoubleLit(l) => l.to_string(),
202+
Lit::BoolLit(l) => (if *l { "TRUE" } else { "FALSE" }).to_string(),
203+
Lit::IonStringLit(l) => format!("`{}`", l),
204+
Lit::CharStringLit(l) => format!("'{}'", l),
205+
Lit::NationalCharStringLit(l) => format!("'{}'", l),
206+
Lit::BitStringLit(l) => format!("b'{}'", l),
207+
Lit::HexStringLit(l) => format!("x'{}'", l),
208+
Lit::DateTimeLit(l) => match l {
209+
ast::DateTimeLit::DateLit(d) => format!("DATE '{}'", d),
210+
ast::DateTimeLit::TimeLit(t) => format!("TIME '{}'", t),
211+
ast::DateTimeLit::TimestampLit(ts) => format!("TIMESTAMP '{}'", ts),
212+
},
213+
Lit::CollectionLit(l) => match l {
214+
ast::CollectionLit::ArrayLit(al) => format!("[{}]", al),
215+
ast::CollectionLit::BagLit(bl) => format!("<<{}>>", bl),
216+
},
217+
Lit::TypedLit(val_str, ty) => {
218+
format!("{} '{}'", type_to_str(ty), val_str)
219+
}
220+
}
221+
}
222+
223+
#[inline]
224+
fn custom_type_param_to_str(param: &ast::CustomTypeParam) -> String {
225+
match param {
226+
CustomTypeParam::Lit(lit) => lit_to_str(lit),
227+
CustomTypeParam::Type(ty) => type_to_str(ty),
228+
}
229+
}
230+
231+
#[inline]
232+
fn custom_type_part_to_str(part: &ast::CustomTypePart) -> String {
233+
match part {
234+
CustomTypePart::Name(name) => symbol_primitive_to_label(name),
235+
CustomTypePart::Parameterized(name, args) => {
236+
let name = symbol_primitive_to_label(name);
237+
let args = args
238+
.iter()
239+
.map(custom_type_param_to_str)
240+
.collect::<Vec<_>>()
241+
.join(",");
242+
format!("{}({})", name, args)
243+
}
244+
}
245+
}
246+
247+
#[inline]
248+
fn type_to_str(ty: &ast::Type) -> String {
249+
match ty {
250+
Type::CustomType(cty) => cty
251+
.parts
252+
.iter()
253+
.map(custom_type_part_to_str)
254+
.collect::<Vec<_>>()
255+
.join(" "),
256+
_ => format!("{:?}", ty),
257+
}
258+
}
259+
187260
impl ToDot<ast::Lit> for AstToDot {
188261
fn to_dot(&mut self, out: &mut Scope, ast: &ast::Lit) -> Targets {
189262
use ast::Lit;
190-
let lbl = match ast {
191-
Lit::Null => "NULL".to_string(),
192-
Lit::Missing => "MISSING".to_string(),
193-
Lit::Int8Lit(l) => l.to_string(),
194-
Lit::Int16Lit(l) => l.to_string(),
195-
Lit::Int32Lit(l) => l.to_string(),
196-
Lit::Int64Lit(l) => l.to_string(),
197-
Lit::DecimalLit(l) => l.to_string(),
198-
Lit::NumericLit(l) => l.to_string(),
199-
Lit::RealLit(l) => l.to_string(),
200-
Lit::FloatLit(l) => l.to_string(),
201-
Lit::DoubleLit(l) => l.to_string(),
202-
Lit::BoolLit(l) => (if *l { "TRUE" } else { "FALSE" }).to_string(),
203-
Lit::IonStringLit(l) => format!("`{}`", l),
204-
Lit::CharStringLit(l) => format!("'{}'", l),
205-
Lit::NationalCharStringLit(l) => format!("'{}'", l),
206-
Lit::BitStringLit(l) => format!("b'{}'", l),
207-
Lit::HexStringLit(l) => format!("x'{}'", l),
208-
Lit::DateTimeLit(l) => match l {
209-
ast::DateTimeLit::DateLit(d) => format!("DATE '{}'", d),
210-
ast::DateTimeLit::TimeLit(t) => format!("TIME '{}'", t),
211-
ast::DateTimeLit::TimestampLit(ts) => format!("TIMESTAMP '{}'", ts),
212-
},
213-
Lit::CollectionLit(l) => match l {
214-
ast::CollectionLit::ArrayLit(al) => format!("[{}]", al),
215-
ast::CollectionLit::BagLit(bl) => format!("<<{}>>", bl),
216-
},
217-
};
263+
let lbl = lit_to_str(ast);
218264

219265
let mut node = out.node_auto();
220266
node.set_label(&lbl).set_shape(Shape::Rectangle);
@@ -399,34 +445,29 @@ impl ToDot<ast::ProjectItem> for AstToDot {
399445
}
400446
}
401447

448+
fn symbol_primitive_to_label(sym: &ast::SymbolPrimitive) -> String {
449+
use ast::CaseSensitivity;
450+
let case = sym.case.clone().unwrap_or(CaseSensitivity::CaseInsensitive);
451+
match case {
452+
CaseSensitivity::CaseSensitive => format!("'{}'", sym.value),
453+
CaseSensitivity::CaseInsensitive => format!("{}", sym.value),
454+
}
455+
}
456+
402457
impl ToDot<ast::SymbolPrimitive> for AstToDot {
403458
fn to_dot(&mut self, out: &mut Scope, ast: &ast::SymbolPrimitive) -> Targets {
404-
use ast::CaseSensitivity;
405-
let case = ast.case.clone().unwrap_or(CaseSensitivity::CaseInsensitive);
406-
let lbl = match case {
407-
CaseSensitivity::CaseSensitive => format!("'{}'", ast.value),
408-
CaseSensitivity::CaseInsensitive => format!("{}", ast.value),
409-
};
459+
let lbl = symbol_primitive_to_label(ast);
410460
let id = out.node_auto_labelled(&lbl).id();
411461
vec![id]
412462
}
413463
}
414464

415465
impl ToDot<ast::VarRef> for AstToDot {
416466
fn to_dot(&mut self, out: &mut Scope, ast: &ast::VarRef) -> Targets {
417-
use ast::CaseSensitivity;
418-
let case = ast
419-
.name
420-
.case
421-
.clone()
422-
.unwrap_or(CaseSensitivity::CaseInsensitive);
423-
let prefix = match &ast.qualifier {
424-
ast::ScopeQualifier::Unqualified => "",
425-
ast::ScopeQualifier::Qualified => "@",
426-
};
427-
let lbl = match case {
428-
CaseSensitivity::CaseSensitive => format!("{}'{}'", prefix, ast.name.value),
429-
CaseSensitivity::CaseInsensitive => format!("{}{}", prefix, ast.name.value),
467+
let lbl = symbol_primitive_to_label(&ast.name);
468+
let lbl = match &ast.qualifier {
469+
ast::ScopeQualifier::Unqualified => lbl,
470+
ast::ScopeQualifier::Qualified => format!("@{}", lbl),
430471
};
431472
let id = out.node_auto_labelled(&lbl).id();
432473

@@ -532,6 +573,24 @@ impl ToDot<ast::CallArg> for AstToDot {
532573
self.to_dot(out, value).edges(out, &id, "value");
533574
vec![id]
534575
}
576+
CallArg::PositionalType(ty) => {
577+
let mut node = out.node_auto_labelled(&type_to_str(ty));
578+
node.set("shape", "parallelogram", false);
579+
vec![node.id()]
580+
}
581+
CallArg::NamedType { name, ty } => {
582+
let id = out.node_auto_labelled("Named").id();
583+
self.to_dot(out, name).edges(out, &id, "name");
584+
585+
let ty_target = {
586+
let mut ty_node = out.node_auto_labelled(&type_to_str(ty));
587+
ty_node.set("shape", "parallelogram", false);
588+
vec![ty_node.id()]
589+
};
590+
ty_target.edges(out, &id, "type");
591+
592+
vec![id]
593+
}
535594
}
536595
}
537596
}

partiql-cli/src/visualize/render.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ fn render_to_string(format: GraphVizFormat, ast: &Box<ast::Expr>) -> String {
7878
String::from_utf8(gv_render(format, ast_to_dot(ast))).expect("valid utf8")
7979
}
8080

81+
/// Convert an AST into an attributed dot graph.
82+
#[inline]
83+
pub fn to_dot_raw(ast: &Box<ast::Expr>) -> String {
84+
ast_to_dot(ast)
85+
}
86+
8187
/// Convert an AST into an attributed dot graph.
8288
#[inline]
8389
pub fn to_dot(ast: &Box<ast::Expr>) -> String {
@@ -87,7 +93,7 @@ pub fn to_dot(ast: &Box<ast::Expr>) -> String {
8793
/// Convert an AST into a pretty-printed dot graph.
8894
#[inline]
8995
pub fn to_pretty_dot(ast: &Box<ast::Expr>) -> String {
90-
render_to_string(GraphVizFormat::PrettyPrint, &ast)
96+
render_to_string(GraphVizFormat::Canon, &ast)
9197
}
9298

9399
/// Convert an AST into a graphviz svg.

partiql-parser/src/lexer.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,10 @@ pub enum Token<'input> {
632632
Where,
633633
#[regex("(?i:With)")]
634634
With,
635+
#[regex("(?i:Without)")]
636+
Without,
637+
#[regex("(?i:Zone)")]
638+
Zone,
635639
}
636640

637641
impl<'input> Token<'input> {
@@ -804,7 +808,9 @@ impl<'input> fmt::Display for Token<'input> {
804808
| Token::Values
805809
| Token::When
806810
| Token::Where
807-
| Token::With => {
811+
| Token::With
812+
| Token::Without
813+
| Token::Zone => {
808814
write!(f, "{}", format!("{:?}", self).to_uppercase())
809815
}
810816
}

0 commit comments

Comments
 (0)