Skip to content

Commit 8d9d962

Browse files
authored
Merge pull request #117 from engboris/lexer
Use sedlex instead of ocamllex
2 parents d3a0da7 + 9d705f0 commit 8d9d962

File tree

15 files changed

+261
-204
lines changed

15 files changed

+261
-204
lines changed

bin/isgen.ml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
open Base
22

33
let rec loop env =
4-
let lexbuf = Lexing.from_channel Stdlib.stdin in
5-
let d = Stellogen.Sgen_parser.declaration Stellogen.Sgen_lexer.read lexbuf in
4+
let lexbuf = Sedlexing.Utf8.from_channel Stdlib.stdin in
5+
let lexer = Sedlexing.with_tokenizer Stellogen.Sgen_lexer.read lexbuf in
6+
let parser =
7+
MenhirLib.Convert.Simplified.traditional2revised
8+
Stellogen.Sgen_parser.declaration
9+
in
10+
let d = parser lexer in
611
let wrapped_env =
712
Stellogen.Sgen_eval.eval_decl ~typecheckonly:false ~notyping:false env d
813
in

bin/lscrun.ml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@ open Base
22
open Cmdliner
33
open Lsc.Lsc_ast
44
open Lsc.Lsc_err
5-
open Lsc.Lsc_parser
6-
open Lsc.Lsc_lexer
75
open Out_channel
86

97
let parse_and_eval input_file unfincomp linear showtrace =
10-
let lexbuf = Lexing.from_channel (Stdlib.open_in input_file) in
11-
lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = input_file };
12-
let mcs = constellation_file read lexbuf in
8+
let lexbuf = Sedlexing.Utf8.from_channel (Stdlib.open_in input_file) in
9+
let start_pos filename =
10+
{ Lexing.pos_fname = filename; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 }
11+
in
12+
Sedlexing.set_position lexbuf (start_pos input_file);
13+
let lexer = Sedlexing.with_tokenizer Lsc.Lsc_lexer.read lexbuf in
14+
let parser =
15+
MenhirLib.Convert.Simplified.traditional2revised
16+
Lsc.Lsc_parser.constellation_file
17+
in
18+
let mcs = parser lexer in
1319
let result =
1420
match exec ~linear ~showtrace mcs with
1521
| Ok result -> result

bin/sgen.ml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
open Base
22
open Cmdliner
33
open Stellogen.Sgen_eval
4-
open Lexing
54

65
let parse_and_eval input_file typecheckonly notyping =
7-
let lexbuf = Lexing.from_channel (Stdlib.open_in input_file) in
8-
lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = input_file };
6+
let lexbuf = Sedlexing.Utf8.from_channel (Stdlib.open_in input_file) in
7+
let start_pos filename =
8+
{ Lexing.pos_fname = filename; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 }
9+
in
10+
Sedlexing.set_position lexbuf (start_pos input_file);
911
let p = Stellogen.Sgen_parsing.parse_with_error lexbuf in
1012
let _ = eval_program ~typecheckonly ~notyping p in
1113
()

dune-project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
(name stellogen)
2020
(synopsis "Stellogen is a minimalistic and logic-agnostic programming
2121
language based on term unification.")
22-
(depends base menhir (alcotest :with-test))
22+
(depends base menhir (alcotest :with-test) sedlex)
2323
(tags
2424
("transcendental syntax" "logic programming" "constraint programming" "resolution logic" "unification" "self-assembly")))
2525

src/lsc/dune

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
(library
22
(name lsc)
3-
(libraries base common)
3+
(libraries base common menhirLib)
4+
(preprocess
5+
(pps sedlex.ppx))
46
(modules lsc_err lsc_ast unification lsc_parser lsc_lexer))
57

68
(env
@@ -11,6 +13,4 @@
1113
(menhir
1214
(modules ../common/common_parser lsc_parser)
1315
(merge_into lsc_parser)
14-
(flags --dump --explain))
15-
16-
(ocamllex lsc_lexer)
16+
(flags --table --dump --explain))

src/lsc/lsc_lexer.ml

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
open Sedlexing
2+
open Lsc_parser
3+
4+
exception SyntaxError of string
5+
6+
let buf = Sedlexing.Utf8.from_channel
7+
8+
let is_func_start = [%sedlex.regexp? 'a' .. 'z' | '0' .. '9']
9+
10+
let is_func_rest =
11+
[%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '?']
12+
13+
let is_var_start = [%sedlex.regexp? 'A' .. 'Z']
14+
15+
let is_var_rest =
16+
[%sedlex.regexp? 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-']
17+
18+
let space = [%sedlex.regexp? Plus (' ' | '\t')]
19+
20+
let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n"]
21+
22+
let rec read_string buf lexbuf =
23+
match%sedlex lexbuf with
24+
| '"' -> SYM ("\"" ^ Buffer.contents buf ^ "\"")
25+
| '\\', '/' ->
26+
Buffer.add_char buf '/';
27+
read_string buf lexbuf
28+
| '\\', '\\' ->
29+
Buffer.add_char buf '\\';
30+
read_string buf lexbuf
31+
| '\\', 'b' ->
32+
Buffer.add_char buf '\b';
33+
read_string buf lexbuf
34+
| '\\', 'f' ->
35+
Buffer.add_char buf '\012';
36+
read_string buf lexbuf
37+
| '\\', 'n' ->
38+
Buffer.add_char buf '\n';
39+
read_string buf lexbuf
40+
| '\\', 'r' ->
41+
Buffer.add_char buf '\r';
42+
read_string buf lexbuf
43+
| '\\', 't' ->
44+
Buffer.add_char buf '\t';
45+
read_string buf lexbuf
46+
| Plus (Compl ('"' | '\\')) ->
47+
Buffer.add_string buf (Utf8.lexeme lexbuf);
48+
read_string buf lexbuf
49+
| eof -> raise (SyntaxError "String is not terminated")
50+
| _ -> raise (SyntaxError ("Illegal string character: " ^ Utf8.lexeme lexbuf))
51+
52+
and comment lexbuf =
53+
match%sedlex lexbuf with
54+
| newline | eof -> read lexbuf
55+
| _ ->
56+
ignore (Sedlexing.next lexbuf);
57+
comment lexbuf
58+
59+
and comments lexbuf =
60+
match%sedlex lexbuf with
61+
| "'''" -> read lexbuf
62+
| _ ->
63+
ignore (Sedlexing.next lexbuf);
64+
comments lexbuf
65+
66+
and read lexbuf =
67+
match%sedlex lexbuf with
68+
| is_var_start, Star is_var_rest -> VAR (Utf8.lexeme lexbuf)
69+
| is_func_start, Star is_func_rest -> SYM (Utf8.lexeme lexbuf)
70+
| '\'' -> comment lexbuf
71+
| "'''" -> comments lexbuf
72+
| '_' -> PLACEHOLDER
73+
| '.' -> DOT
74+
| '|' -> BAR
75+
| '[' -> LBRACK
76+
| ']' -> RBRACK
77+
| '(' -> LPAR
78+
| ')' -> RPAR
79+
| ',' -> COMMA
80+
| '@' -> AT
81+
| '&' -> AMP
82+
| '+' -> PLUS
83+
| '-' -> MINUS
84+
| ':' -> CONS
85+
| ';' -> SEMICOLON
86+
| '"' -> read_string (Buffer.create 128) lexbuf
87+
| space | newline -> read lexbuf
88+
| eof -> EOF
89+
| _ ->
90+
raise
91+
(SyntaxError
92+
("Unexpected character '" ^ Utf8.lexeme lexbuf ^ "' during lexing") )

src/lsc/lsc_lexer.mll

Lines changed: 0 additions & 68 deletions
This file was deleted.

src/lsc/lsc_parser.mly

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
open Lsc_ast
33
%}
44

5+
%token <string> VAR
6+
%token <string> SYM
57
%token BAR
68
%token NEQ
79
%token COMMA
8-
%token <string> VAR
9-
%token <string> SYM
1010
%token PLUS MINUS
1111
%token CONS
1212
%token SEMICOLON

src/stellogen/dune

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
(library
22
(name stellogen)
3-
(libraries base common))
3+
(libraries base common menhirLib)
4+
(preprocess
5+
(pps sedlex.ppx)))
46

57
(env
68
(dev
@@ -10,6 +12,4 @@
1012
(menhir
1113
(modules ../common/common_parser lsc_parser sgen_parser)
1214
(merge_into sgen_parser)
13-
(flags --explain --dump))
14-
15-
(ocamllex sgen_lexer)
15+
(flags --table --explain --dump))

src/stellogen/sgen_eval.ml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -500,9 +500,13 @@ let rec eval_decl ~typecheckonly ~notyping env :
500500
| Use path ->
501501
let path = List.map path ~f:string_of_ray in
502502
let formatted_filename = String.concat ~sep:"/" path ^ ".sg" in
503-
let lexbuf = Lexing.from_channel (Stdlib.open_in formatted_filename) in
504-
lexbuf.lex_curr_p <-
505-
{ lexbuf.lex_curr_p with pos_fname = formatted_filename };
503+
let lexbuf =
504+
Sedlexing.Utf8.from_channel (Stdlib.open_in formatted_filename)
505+
in
506+
let start_pos filename =
507+
{ Lexing.pos_fname = filename; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 }
508+
in
509+
Sedlexing.set_position lexbuf (start_pos formatted_filename);
506510
let p = Sgen_parsing.parse_with_error lexbuf in
507511
let* env = eval_program ~typecheckonly ~notyping p in
508512
Ok env

0 commit comments

Comments
 (0)