Skip to content

Commit ca74f91

Browse files
committed
Cleanup parser
1 parent 79ac979 commit ca74f91

File tree

2 files changed

+15
-16
lines changed

2 files changed

+15
-16
lines changed

source/Parser.mly

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
%token CLOSE_BRACE
3636
%token EOF
3737

38-
/* %left OPEN_BRACKET */
3938
/* according to https://github.com/stedolan/jq/issues/1326 */
4039
%right PIPE /* lowest precedence */
4140
%nonassoc COMMA
@@ -55,28 +54,29 @@ program:
5554
| EOF;
5655
{ Identity }
5756

58-
str_or_id:
57+
string_or_identifier:
5958
| key = IDENTIFIER { Literal (String key) }
6059
| key = STRING { Literal (String key) }
6160

62-
key_val(E):
63-
| key = str_or_id
61+
key_value (E):
62+
| key = string_or_identifier
6463
{ key, None }
6564
| OPEN_PARENT; e1 = E CLOSE_PARENT; COLON; e2 = E
6665
{ e1, Some e2 }
67-
| key = str_or_id; COLON; e = E
66+
| key = string_or_identifier; COLON; e = E
6867
{ key, Some e }
6968

7069
elif_term:
7170
| ELIF cond = item_expr THEN e = term
7271
{ cond, e }
7372

7473
// sequence_expr handles the lowest precedence operators: comma and pipe
74+
// while item_expr handles the higher precedence operators
7575
sequence_expr:
7676
| left = sequence_expr; COMMA; right = sequence_expr;
7777
{ Comma (left, right) }
7878

79-
| left = sequence_expr; PIPE; right = item_expr; // Pipe binds tighter than comma, but less than others
79+
| left = sequence_expr; PIPE; right = item_expr;
8080
{ Pipe (left, right) }
8181

8282
| e = item_expr
@@ -96,10 +96,10 @@ sequence_expr:
9696
| AND {And}
9797
| OR {Or}
9898

99-
// item_expr handles operators with higher precedence than COMMA and PIPE
10099
item_expr:
101100
| left = item_expr; op = operator; right = item_expr;
102101
{ Operation (left, op, right) }
102+
103103
| e = term
104104
{ e }
105105

@@ -124,8 +124,7 @@ term:
124124
{ Literal(Null) }
125125
| RANGE; OPEN_PARENT; nl = separated_nonempty_list(SEMICOLON, number); CLOSE_PARENT;
126126
{
127-
let nl = List.map int_of_float nl in
128-
match nl with
127+
match (List.map Int.of_float nl) with
129128
| [] -> assert false (* nonempty_list *)
130129
| x :: [] -> Range (x, None, None)
131130
| x :: y :: [] -> Range (x, Some y, None)
@@ -211,12 +210,13 @@ term:
211210
| OPEN_BRACE; CLOSE_BRACE;
212211
{ Object [] }
213212

214-
| e = delimited(OPEN_BRACE, separated_nonempty_list(COMMA, key_val(term)), CLOSE_BRACE);
213+
| e = delimited(OPEN_BRACE, separated_nonempty_list(COMMA, key_value (term)), CLOSE_BRACE);
215214
{ Object e }
216215

217216
// Parentheses allow a full sequence_expr inside, reducing to an item_expr
218217
| OPEN_PARENT; e = sequence_expr; CLOSE_PARENT;
219218
{ e }
219+
220220
| e = term; OPEN_BRACKET; i = number; CLOSE_BRACKET
221221
{ Pipe (e, Index (int_of_float i)) }
222222

source/Tokenizer.ml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
open Sedlexing.Utf8
22

3-
let dot = [%sedlex.regexp? '.']
43
let digit = [%sedlex.regexp? '0' .. '9']
54
let number = [%sedlex.regexp? Plus digit, Opt '.', Opt (Plus digit)]
65
let space = [%sedlex.regexp? Plus ('\n' | '\t' | ' ')]
@@ -51,7 +50,7 @@ type token =
5150
| EOF
5251
[@@deriving show]
5352

54-
let string buf =
53+
let tokenize_string buf =
5554
let buffer = Buffer.create 10 in
5655
let rec read_string buf =
5756
[%sedlex
@@ -108,13 +107,13 @@ let rec tokenize buf =
108107
| "else" -> Ok ELSE
109108
| "elif" -> Ok ELIF
110109
| "end" -> Ok END
111-
| dot -> Ok DOT
110+
| "." -> Ok DOT
112111
| ".." -> Ok RECURSE
113-
| '"' -> string buf
112+
| '"' -> tokenize_string buf
114113
| identifier -> tokenize_apply buf
115114
| number ->
116-
let num = lexeme buf |> float_of_string in
117-
Ok (NUMBER num)
115+
let num = lexeme buf in
116+
Ok (NUMBER (Float.of_string num))
118117
| space -> tokenize buf
119118
| any -> Error ("Unexpected character '" ^ lexeme buf ^ "'")
120119
| _ -> Error "Unexpected character"

0 commit comments

Comments
 (0)