|
| 1 | +open Sedlexing |
| 2 | +open Lsc_parser |
| 3 | + |
| 4 | +exception SyntaxError of string |
| 5 | + |
| 6 | +let buf = Sedlexing.Utf8.from_channel |
| 7 | + |
| 8 | +let is_func_start = [%sedlex.regexp? 'a' .. 'z' | '0' .. '9'] |
| 9 | + |
| 10 | +let is_func_rest = |
| 11 | + [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '?'] |
| 12 | + |
| 13 | +let is_var_start = [%sedlex.regexp? 'A' .. 'Z'] |
| 14 | + |
| 15 | +let is_var_rest = |
| 16 | + [%sedlex.regexp? 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-'] |
| 17 | + |
| 18 | +let space = [%sedlex.regexp? Plus (' ' | '\t')] |
| 19 | + |
| 20 | +let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n"] |
| 21 | + |
| 22 | +let rec read_string buf lexbuf = |
| 23 | + match%sedlex lexbuf with |
| 24 | + | '"' -> SYM ("\"" ^ Buffer.contents buf ^ "\"") |
| 25 | + | '\\', '/' -> |
| 26 | + Buffer.add_char buf '/'; |
| 27 | + read_string buf lexbuf |
| 28 | + | '\\', '\\' -> |
| 29 | + Buffer.add_char buf '\\'; |
| 30 | + read_string buf lexbuf |
| 31 | + | '\\', 'b' -> |
| 32 | + Buffer.add_char buf '\b'; |
| 33 | + read_string buf lexbuf |
| 34 | + | '\\', 'f' -> |
| 35 | + Buffer.add_char buf '\012'; |
| 36 | + read_string buf lexbuf |
| 37 | + | '\\', 'n' -> |
| 38 | + Buffer.add_char buf '\n'; |
| 39 | + read_string buf lexbuf |
| 40 | + | '\\', 'r' -> |
| 41 | + Buffer.add_char buf '\r'; |
| 42 | + read_string buf lexbuf |
| 43 | + | '\\', 't' -> |
| 44 | + Buffer.add_char buf '\t'; |
| 45 | + read_string buf lexbuf |
| 46 | + | Plus (Compl ('"' | '\\')) -> |
| 47 | + Buffer.add_string buf (Utf8.lexeme lexbuf); |
| 48 | + read_string buf lexbuf |
| 49 | + | eof -> raise (SyntaxError "String is not terminated") |
| 50 | + | _ -> raise (SyntaxError ("Illegal string character: " ^ Utf8.lexeme lexbuf)) |
| 51 | + |
| 52 | +and comment lexbuf = |
| 53 | + match%sedlex lexbuf with |
| 54 | + | newline | eof -> read lexbuf |
| 55 | + | _ -> |
| 56 | + ignore (Sedlexing.next lexbuf); |
| 57 | + comment lexbuf |
| 58 | + |
| 59 | +and comments lexbuf = |
| 60 | + match%sedlex lexbuf with |
| 61 | + | "'''" -> read lexbuf |
| 62 | + | _ -> |
| 63 | + ignore (Sedlexing.next lexbuf); |
| 64 | + comments lexbuf |
| 65 | + |
| 66 | +and read lexbuf = |
| 67 | + match%sedlex lexbuf with |
| 68 | + | is_var_start, Star is_var_rest -> VAR (Utf8.lexeme lexbuf) |
| 69 | + | is_func_start, Star is_func_rest -> SYM (Utf8.lexeme lexbuf) |
| 70 | + | '\'' -> comment lexbuf |
| 71 | + | "'''" -> comments lexbuf |
| 72 | + | '_' -> PLACEHOLDER |
| 73 | + | '.' -> DOT |
| 74 | + | '|' -> BAR |
| 75 | + | '[' -> LBRACK |
| 76 | + | ']' -> RBRACK |
| 77 | + | '(' -> LPAR |
| 78 | + | ')' -> RPAR |
| 79 | + | ',' -> COMMA |
| 80 | + | '@' -> AT |
| 81 | + | '&' -> AMP |
| 82 | + | '+' -> PLUS |
| 83 | + | '-' -> MINUS |
| 84 | + | ':' -> CONS |
| 85 | + | ';' -> SEMICOLON |
| 86 | + | '"' -> read_string (Buffer.create 128) lexbuf |
| 87 | + | space | newline -> read lexbuf |
| 88 | + | eof -> EOF |
| 89 | + | _ -> |
| 90 | + raise |
| 91 | + (SyntaxError |
| 92 | + ("Unexpected character '" ^ Utf8.lexeme lexbuf ^ "' during lexing") ) |
0 commit comments