Skip to content

Commit 2a745ce

Browse files
committed
A very stupid JSONata parser for Erlang!
But crazy ideas start small only to either fail or become big, either way it's worth a try. The intention of the JSONata parser is to transform JSONata into Erlang code that can then be executed either at runtime or precompiled before executing the flow code. Also the set of valid JSONata stanzas that will be convertible will be the set that I need[1] - this being a very much smaller set than all valid JSONata[2]. I'm sure this approach is a deadend since JSONata can become extremely complex and conversion to Erlang will be messy and complex but its an approach that can be quickly and dirtily implemented. Plus, in the process, the learnings of playing around with leex and yecc are well worth it: niche concepts (parser construction), in a niche programming language (Erlang) for a niche problem space (JSONata) and the whole thing applied to a niche development concept (visual FBP)! :) [1]: https://github.com/gorenje/erlang-red/blob/40b489e9491d6f1ef38218838e21c0d85f6f0e30/priv/jsonata/leex.examples.json [2]: jsonata-js/jsonata#731 (comment)
1 parent 40b489e commit 2a745ce

File tree

6 files changed

+2571
-5693
lines changed

6 files changed

+2571
-5693
lines changed

src/jsonata_leex.erl

Lines changed: 2197 additions & 5421 deletions
Large diffs are not rendered by default.

src/jsonata_leex.xrl

Lines changed: 125 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -27,133 +27,156 @@ INT = {D}+
2727
NAME = [a-zA-Z_][a-zA-Z0-9_]*
2828
WHITESPACE = [\s\t\n\r]
2929
INPUT = \$\$
30-
30+
DSTRING = \"[^\"+]\"
31+
SSTRING = \'[^\'+]\'
32+
CHARS = [a-z0-9A-Z_]+
3133
3234
Rules.
3335
34-
\/\* : {token, {commment_start, TokenLine}}.
35-
\*\/ : {token, {commment_end, TokenLine}}.
36+
%% Taken from
37+
%% https://github.com/alpaca-lang/alpaca/blob/main/src/alpaca_scan.xrl
38+
39+
%% Integer
40+
{INT} : {token, {int, TokenLine, list_to_integer(TokenChars)}}.
3641
37-
!= : {token, {neq, TokenLine}}.
38-
:= : {token, {colon_eq, TokenLine}}.
39-
== : {token, {eq, TokenLine}}.
42+
%% Float
43+
{INT}\.{D}+ : {token, {float, TokenLine, list_to_float(TokenChars)}}.
44+
45+
46+
\/\* : {token, {commment_start, TokenLine}}.
47+
\*\/ : {token, {commment_end, TokenLine}}.
48+
49+
\^\( : {token, {order_by, TokenLine}}.
50+
51+
~\> : {token, {op_chain, TokenLine}}.
52+
!= : {token, {neq, TokenLine}}.
53+
:= : {token, {assign, TokenLine}}.
54+
== : {token, {eq, TokenLine}}.
55+
\>= : {token, {gt_eq, TokenLine}}.
56+
\<= : {token, {lt_eq, TokenLine}}.
57+
msg : {token, {msg_obj, TokenLine}}.
58+
and : {token, {op_and, TokenLine}}.
59+
or : {token, {op_or, TokenLine}}.
60+
in : {token, {op_in, TokenLine}}.
61+
\*\* : {token, {op_descendant, TokenLine}}.
4062
4163
\+ : {token, {op_plus, TokenLine}}.
4264
\- : {token, {op_minus, TokenLine}}.
4365
\* : {token, {op_multiple, TokenLine}}.
4466
\/ : {token, {op_divide, TokenLine}}.
45-
\( : {token, {open_bracket, TokenLine}}.
46-
\) : {token, {close_backet, TokenLine}}.
47-
{ : {token, {open_brace, TokenLine}}.
48-
} : {token, {close_brace, TokenLine}}.
49-
\[ : {token, {open_square, TokenLine}}.
50-
\] : {token, {close_sqaure, TokenLine}}.
51-
\: : {token, {colon, TokenLine}}.
52-
\; : {token, {semicolon, TokenLine}}.
67+
\# : {token, {op_hash, TokenLine}}.
68+
\( : {token, {'(', TokenLine}}.
69+
\) : {token, {')', TokenLine}}.
70+
\[ : {token, {'[', TokenLine}}.
71+
\] : {token, {']', TokenLine}}.
72+
\: : {token, {':', TokenLine}}.
73+
\; : {token, {';', TokenLine}}.
5374
\? : {token, {question, TokenLine}}.
54-
\= : {token, {equal, TokenLine}}.
75+
\= : {token, {eq, TokenLine}}.
5576
\% : {token, {percent, TokenLine}}.
56-
\< : {token, {lt, TokenLine}}.
57-
\> : {token, {gt, TokenLine}}.
77+
\< : {token, {lt, TokenLine}}.
78+
\> : {token, {gt, TokenLine}}.
5879
\" : {token, {doublequote, TokenLine}}.
5980
\' : {token, {singlequote, TokenLine}}.
60-
and : {token, {op_and, TokenLine}}.
61-
or : {token, {op_or, TokenLine}}.
6281
\& : {token, {ampersand, TokenLine}}.
6382
\$ : {token, {dollar, TokenLine}}.
64-
\. : {token, {dot, TokenLine}}.
65-
, : {token, {comma, TokenLine}}.
66-
! : {token, {bang, TokenLine}}.
83+
\~ : {token, {tilda, TokenLine}}.
84+
\^ : {token, {karat, TokenLine}}.
85+
\@ : {token, {at, TokenLine}}.
86+
\. : {token, {'.', TokenLine}}.
87+
\\ : {token, {backslash, TokenLine}}.
88+
\| : {token, {'|', TokenLine}}.
89+
, : {token, {',', TokenLine}}.
90+
! : {token, {'!', TokenLine}}.
91+
{ : {token, {'{', TokenLine}}.
92+
} : {token, {'}', TokenLine}}.
6793
6894
% Function names taken from
6995
% https://github.com/jsonata-js/jsonata/blob/master/src/functions.js
70-
abs : {token, {funct, TokenLine, TokenChars}}.
71-
append : {token, {funct, TokenLine, TokenChars}}.
72-
assert : {token, {funct, TokenLine, TokenChars}}.
73-
average : {token, {funct, TokenLine, TokenChars}}.
74-
base64decode : {token, {funct, TokenLine, TokenChars}}.
75-
base64encode : {token, {funct, TokenLine, TokenChars}}.
76-
boolean : {token, {funct, TokenLine, TokenChars}}.
77-
ceil : {token, {funct, TokenLine, TokenChars}}.
78-
contains : {token, {funct, TokenLine, TokenChars}}.
79-
count : {token, {funct, TokenLine, TokenChars}}.
80-
decodeUrl : {token, {funct, TokenLine, TokenChars}}.
81-
decodeUrlComponent : {token, {funct, TokenLine, TokenChars}}.
82-
distinct : {token, {funct, TokenLine, TokenChars}}.
83-
each : {token, {funct, TokenLine, TokenChars}}.
84-
encodeUrl : {token, {funct, TokenLine, TokenChars}}.
85-
encodeUrlComponent : {token, {funct, TokenLine, TokenChars}}.
86-
error : {token, {funct, TokenLine, TokenChars}}.
87-
exists : {token, {funct, TokenLine, TokenChars}}.
88-
filter : {token, {funct, TokenLine, TokenChars}}.
89-
floor : {token, {funct, TokenLine, TokenChars}}.
90-
foldLeft : {token, {funct, TokenLine, TokenChars}}.
91-
formatBase : {token, {funct, TokenLine, TokenChars}}.
92-
formatNumber : {token, {funct, TokenLine, TokenChars}}.
93-
join : {token, {funct, TokenLine, TokenChars}}.
94-
keys : {token, {funct, TokenLine, TokenChars}}.
95-
length : {token, {funct, TokenLine, TokenChars}}.
96-
lookup : {token, {funct, TokenLine, TokenChars}}.
97-
lowercase : {token, {funct, TokenLine, TokenChars}}.
98-
map : {token, {funct, TokenLine, TokenChars}}.
99-
match : {token, {funct, TokenLine, TokenChars}}.
100-
max : {token, {funct, TokenLine, TokenChars}}.
101-
merge : {token, {funct, TokenLine, TokenChars}}.
102-
min : {token, {funct, TokenLine, TokenChars}}.
103-
not : {token, {funct, TokenLine, TokenChars}}.
104-
number : {token, {funct, TokenLine, TokenChars}}.
105-
pad : {token, {funct, TokenLine, TokenChars}}.
106-
power : {token, {funct, TokenLine, TokenChars}}.
107-
random : {token, {funct, TokenLine, TokenChars}}.
108-
replace : {token, {funct, TokenLine, TokenChars}}.
109-
reverse : {token, {funct, TokenLine, TokenChars}}.
110-
round : {token, {funct, TokenLine, TokenChars}}.
111-
shuffle : {token, {funct, TokenLine, TokenChars}}.
112-
sift : {token, {funct, TokenLine, TokenChars}}.
113-
single : {token, {funct, TokenLine, TokenChars}}.
114-
sort : {token, {funct, TokenLine, TokenChars}}.
115-
split : {token, {funct, TokenLine, TokenChars}}.
116-
spread : {token, {funct, TokenLine, TokenChars}}.
117-
sqrt : {token, {funct, TokenLine, TokenChars}}.
118-
string : {token, {funct, TokenLine, TokenChars}}.
119-
substring : {token, {funct, TokenLine, TokenChars}}.
120-
substringAfter : {token, {funct, TokenLine, TokenChars}}.
121-
substringBefore : {token, {funct, TokenLine, TokenChars}}.
122-
sum : {token, {funct, TokenLine, TokenChars}}.
123-
trim : {token, {funct, TokenLine, TokenChars}}.
124-
type : {token, {funct, TokenLine, TokenChars}}.
125-
uppercase : {token, {funct, TokenLine, TokenChars}}.
126-
zip : {token, {funct, TokenLine, TokenChars}}.
96+
\$abs : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
97+
\$append : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
98+
\$assert : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
99+
\$average : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
100+
\$base64decode : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
101+
\$base64encode : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
102+
\$boolean : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
103+
\$ceil : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
104+
\$contains : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
105+
\$count : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
106+
\$decodeUrl : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
107+
\$decodeUrlComponent : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
108+
\$distinct : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
109+
\$each : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
110+
\$encodeUrl : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
111+
\$encodeUrlComponent : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
112+
\$error : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
113+
\$exists : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
114+
\$filter : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
115+
\$floor : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
116+
\$foldLeft : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
117+
\$formatBase : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
118+
\$formatNumber : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
119+
\$join : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
120+
\$keys : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
121+
\$length : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
122+
\$lookup : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
123+
\$lowercase : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
124+
\$map : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
125+
\$match : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
126+
\$max : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
127+
\$merge : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
128+
\$min : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
129+
\$not : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
130+
\$number : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
131+
\$pad : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
132+
\$power : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
133+
\$random : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
134+
\$replace : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
135+
\$reverse : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
136+
\$round : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
137+
\$shuffle : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
138+
\$sift : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
139+
\$single : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
140+
\$sort : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
141+
\$split : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
142+
\$spread : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
143+
\$sqrt : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
144+
\$string : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
145+
\$substring : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
146+
\$substringAfter : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
147+
\$substringBefore : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
148+
\$sum : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
149+
\$trim : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
150+
\$type : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
151+
\$uppercase : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
152+
\$zip : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
127153
128154
%%
129155
%% NodeRED specials
130-
clone : {token, {funct, TokenLine, TokenChars}}.
131-
env : {token, {funct, TokenLine, TokenChars}}.
132-
flowContext : {token, {funct, TokenLine, TokenChars}}.
133-
fromMillis : {token, {funct, TokenLine, TokenChars}}.
134-
globalContext : {token, {funct, TokenLine, TokenChars}}.
135-
millis : {token, {funct, TokenLine, TokenChars}}.
136-
moment : {token, {funct, TokenLine, TokenChars}}.
137-
now : {token, {funct, TokenLine, TokenChars}}.
138-
parseInteger : {token, {funct, TokenLine, TokenChars}}.
139-
reduce : {token, {funct, TokenLine, TokenChars}}.
140-
toMillis : {token, {funct, TokenLine, TokenChars}}.
141-
142-
function : {token, {funct_def, TokenLine}}.
143-
144-
156+
\$clone : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
157+
\$env : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
158+
\$flowContext : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
159+
\$fromMillis : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
160+
\$globalContext : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
161+
\$millis : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
162+
\$moment : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
163+
\$now : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
164+
\$parseInteger : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
165+
\$reduce : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
166+
\$toMillis : {token, {funct, TokenLine, remove_dollars(TokenChars)}}.
167+
168+
function : {token, {funct_def, TokenLine}}.
145169
146170
{NAME} : {token, {name, TokenLine, TokenChars}}.
147171
{WHITESPACE}+ : skip_token.
148172
{INPUT} : {token, {msg_obj, TokenLine}}.
173+
{DSTRING} : {token, {string, TokenLine, TokenChars}}.
174+
{SSTRING} : {token, {string, TokenLine, TokenChars}}.
175+
{CHARS} : {token, {chars, TokenLine, TokenChars}}.
149176
150-
%% Taken from
151-
%% https://github.com/alpaca-lang/alpaca/blob/main/src/alpaca_scan.xrl
152-
153-
%% Integer
154-
{D}+ : {token, {int, TokenLine, list_to_integer(TokenChars)}}.
155-
156-
%% Float
157-
{D}+\.{D}+ : {token, {float, TokenLine, list_to_float(TokenChars)}}.
177+
. : {error, "Unexpected token: " ++ TokenChars}.
158178
159179
Erlang code.
180+
181+
remove_dollars([$$|Chars]) ->
182+
list_to_atom(Chars).

0 commit comments

Comments
 (0)