Skip to content

Commit f3e7473

Browse files
committed
adjust module lexer to color #; comments that appear before the #lang line the way that the racket lexer does
1 parent 45f04f8 commit f3e7473

File tree

2 files changed

+91
-55
lines changed

2 files changed

+91
-55
lines changed

syntax-color-lib/syntax-color/module-lexer.rkt

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -27,40 +27,39 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
2727
2828
|#
2929

30-
31-
(define (do-module-lexer* in offset mode filter-lexer)
30+
(struct before-lang-line (racket-lexer-mode) #:prefab)
31+
(define (do-module-lexer* in offset mode can-return-attribs-hash? filter-lexer)
3232
(cond
33-
[(or (not mode) (eq? mode 'before-lang-line))
33+
[(or (not mode) (before-lang-line? mode))
3434
(define lexer-port (peeking-input-port in #:init-position (+ 1 (file-position in))))
3535
(let-values ([(line col pos) (port-next-location in)])
3636
(when line
3737
(port-count-lines! lexer-port)))
3838
(set-port-next-location-from in lexer-port)
39-
(define-values (lexeme type data new-token-start new-token-end) (racket-lexer lexer-port))
39+
(define-values (lexeme type data new-token-start new-token-end backup new-mode)
40+
(racket-lexer* lexer-port offset
41+
(if (before-lang-line? mode)
42+
(before-lang-line-racket-lexer-mode mode)
43+
#f)))
44+
(define the-type (if (symbol? type) type (hash-ref type 'type)))
45+
(define is-a-comment-type? (or (equal? the-type 'comment) (equal? the-type 'white-space) (equal? the-type 'sexp-comment)))
4046
(cond
41-
[(equal? type 'sexp-comment)
42-
(define position-before-read (file-position lexer-port))
43-
(define read-succeeded?
44-
(with-handlers ([exn:fail:read? (λ (x) #f)])
45-
(read lexer-port)
46-
#t))
47-
(cond
48-
[read-succeeded?
49-
;; sync ports
50-
(for/list ([i (in-range (file-position in) (file-position lexer-port))])
51-
(read-byte-or-special in))
52-
(define-values (_1 _2 new-token-end) (port-next-location in))
53-
(values lexeme type data new-token-start new-token-end 0 'before-lang-line)]
54-
[else
55-
(copy-port in (open-output-nowhere))
56-
(define-values (_1 _2 end) (port-next-location in))
57-
(values "#;" 'error #f new-token-start end 0 'before-lang-line)])]
58-
[(or (eq? type 'comment) (eq? type 'white-space))
47+
[(or is-a-comment-type? (and (hash? type) (hash-ref type 'comment? #f)))
5948
(define lexer-end (file-position lexer-port))
6049
;; sync ports
6150
(for/list ([i (in-range (file-position in) (file-position lexer-port))])
6251
(read-byte-or-special in))
63-
(values lexeme type data new-token-start new-token-end 0 'before-lang-line)]
52+
(values lexeme
53+
(cond
54+
[can-return-attribs-hash?
55+
type]
56+
[is-a-comment-type?
57+
the-type]
58+
[(and (hash? type) (equal? (hash-ref type 'type) 'error))
59+
'error]
60+
[else
61+
'comment])
62+
data new-token-start new-token-end backup (before-lang-line new-mode))]
6463
[else
6564
;; look for #lang:
6665
(define p (peeking-input-port in #:init-position (+ 1 (file-position in))))
@@ -107,7 +106,8 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
107106
(cons the-lexer #f)
108107
the-lexer))]
109108

110-
[(and (eq? type 'other)
109+
[(and (or (equal? type 'other)
110+
(and (hash? type) (equal? (hash-ref type 'type) 'other)))
111111
(string? lexeme)
112112
;; the read-language docs say that this is all it takes to commit to a #lang
113113
(regexp-match #rx"^#[!l]" lexeme))
@@ -118,46 +118,52 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
118118
[else
119119
(for ([i (in-range (file-position in) (file-position lexer-port))])
120120
(read-byte-or-special in))
121-
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
121+
(values lexeme
122+
(if can-return-attribs-hash? type (attribs->symbol type))
123+
data new-token-start new-token-end 0 'no-lang-line)])])]
122124
[(eq? mode 'no-lang-line)
123125
(let-values ([(lexeme type data new-token-start new-token-end)
124126
(racket-lexer in)])
125127
(values lexeme type data new-token-start new-token-end 0 'no-lang-line))]
126128
[(pair? mode)
127129
;; #lang-selected language consumes and produces a mode:
128-
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
130+
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
129131
((car mode) in offset (cdr mode))])
130-
(values lexeme type data new-token-start new-token-end backup-delta
132+
(values lexeme
133+
(if can-return-attribs-hash? type (attribs->symbol type))
134+
data new-token-start new-token-end backup-delta
131135
(if (dont-stop? new-mode)
132136
(dont-stop (cons (car mode) (dont-stop-val new-mode)))
133137
(cons (car mode) new-mode))))]
134138
[else
135139
;; #lang-selected language (or default) doesn't deal with modes:
136140
(let-values ([(lexeme type data new-token-start new-token-end)
137141
(mode in)])
138-
(values lexeme type data new-token-start new-token-end 0 mode))]))
142+
(values lexeme
143+
(if can-return-attribs-hash? type (attribs->symbol type))
144+
data new-token-start new-token-end 0 mode))]))
145+
146+
(define (attribs->symbol type)
147+
(if (hash? type)
148+
(hash-ref type 'type 'unknown)
149+
type))
139150

140151
(define (module-lexer* in offset mode)
141-
(do-module-lexer* in offset mode (lambda (lexer) lexer)))
152+
(do-module-lexer* in offset mode #t (lambda (lexer) lexer)))
142153

143154
(define (module-lexer in offset mode)
144-
(define (attribs->symbol type)
145-
(if (hash? type)
146-
(hash-ref type 'type 'unknown)
147-
type))
148-
(define-values (lexeme type data start end backup new-mode)
149-
(do-module-lexer* in offset mode (lambda (lexer)
150-
(cond
151-
[(eq? lexer racket-lexer*) racket-lexer]
152-
[(not (procedure-arity-includes? lexer 3)) lexer]
153-
[else
154-
(procedure-rename
155-
(lambda (in offset mode)
156-
(define-values (lexeme type data start end backup new-mode)
157-
(lexer in offset mode))
158-
(values lexeme (attribs->symbol type) data start end backup new-mode))
159-
(object-name lexer))]))))
160-
(values lexeme (attribs->symbol type) data start end backup new-mode))
155+
(do-module-lexer* in offset mode #f
156+
(lambda (lexer)
157+
(cond
158+
[(eq? lexer racket-lexer*) racket-lexer]
159+
[(not (procedure-arity-includes? lexer 3)) lexer]
160+
[else
161+
(procedure-rename
162+
(lambda (in offset mode)
163+
(define-values (lexeme type data start end backup new-mode)
164+
(lexer in offset mode))
165+
(values lexeme (attribs->symbol type) data start end backup new-mode))
166+
(object-name lexer))]))))
161167

162168
(define (set-port-next-location-from src dest)
163169
(define-values (line col pos) (port-next-location src))

syntax-color-test/tests/syntax-color/module-lexer.rkt

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
racket/gui/base
55
rackunit)
66

7+
(struct before-lang-line (racket-lexer-mode) #:prefab)
78
(define (lex input count? #:modes [modes (list module-lexer module-lexer*)])
89
(define results
910
(for/list ([lexer (in-list modes)])
@@ -32,6 +33,7 @@
3233
(cond
3334
[(procedure? mode)
3435
`(proc ,(object-name mode))]
36+
[(before-lang-line? mode) 'before-lang-line]
3537
[(and (pair? mode)
3638
(procedure? (car mode)))
3739
;; a hack: translate 'racket-lexer* shape to 'racket-lexer
@@ -94,24 +96,52 @@
9496
`((" " white-space 1 2 #f)
9597
("#lang BOGUS" error 2 13 before-lang-line)
9698
(,eof eof #f #f no-lang-line)))
97-
(check-equal? (lex "#;()#lang BOGUS" #t)
98-
`(("#;" sexp-comment 1 5 #f)
99+
(check-equal? (lex "#;()#lang BOGUS" #t #:modes (list module-lexer*))
100+
`(("#;" sexp-comment 1 3 #f)
101+
("(" #hash((comment? . #t) (type . parenthesis)) 3 4 before-lang-line)
102+
(")" #hash((comment? . #t) (type . parenthesis)) 4 5 before-lang-line)
103+
("#lang BOGUS" error 5 16 before-lang-line)
104+
(,eof eof #f #f no-lang-line)))
105+
(check-equal? (lex "#;()#lang BOGUS" #t #:modes (list module-lexer))
106+
`(("#;" sexp-comment 1 3 #f)
107+
("(" comment 3 4 before-lang-line)
108+
(")" comment 4 5 before-lang-line)
99109
("#lang BOGUS" error 5 16 before-lang-line)
100110
(,eof eof #f #f no-lang-line)))
101111
(check-equal? (lex "#lang BOGUS\n\"aa" #t)
102112
`(("#lang BOGUS" error 1 12 #f)
103113
("\n" white-space 12 13 no-lang-line)
104114
("\"aa" error 13 16 no-lang-line)
105115
(,eof eof #f #f no-lang-line)))
106-
(check-equal? (lex "#;(stuff" #t)
107-
`(;; token perhaps should be the whole string (up to the first special?)
108-
("#;" error 1 9 #f)
116+
(check-equal? (lex "#;(stuff" #t #:modes (list module-lexer*))
117+
`(;; this should arguably be an error, but the racket lexer doesn't make it
118+
;; an error so we inherit that behavior here
119+
("#;" sexp-comment 1 3 #f)
120+
("(" #hash((comment? . #t) (type . parenthesis)) 3 4 before-lang-line)
121+
("stuff" #hash((comment? . #t) (type . symbol)) 4 9 before-lang-line)
122+
(,eof eof #f #f before-lang-line)))
123+
(check-equal? (lex "#;(stuff" #t #:modes (list module-lexer))
124+
`(;; this should arguably be an error, but the racket lexer doesn't make it
125+
;; an error so we inherit that behavior here
126+
("#;" sexp-comment 1 3 #f)
127+
("(" comment 3 4 before-lang-line)
128+
("stuff" comment 4 9 before-lang-line)
129+
(,eof eof #f #f before-lang-line)))
130+
(check-equal? (lex "#;\"ü" #t #:modes (list module-lexer*))
131+
`(("#;" sexp-comment 1 3 #f)
132+
("\"ü" #hash((comment? . #t) (type . error)) 3 5 before-lang-line)
133+
(,eof eof #f #f before-lang-line)))
134+
(check-equal? (lex "#;\"ü" #t #:modes (list module-lexer))
135+
`(("#;" sexp-comment 1 3 #f)
136+
("\"ü" error 3 5 before-lang-line)
109137
(,eof eof #f #f before-lang-line)))
110-
(check-equal? (lex "#;\"ü" #t)
111-
`(("#;" error 1 5 #f)
138+
(check-equal? (lex "#;ü" #t #:modes (list module-lexer*))
139+
`(("#;" sexp-comment 1 3 #f)
140+
("ü" #hash((comment? . #t) (type . symbol)) 3 4 before-lang-line)
112141
(,eof eof #f #f before-lang-line)))
113-
(check-equal? (lex "#;ü" #t)
114-
`(("#;" sexp-comment 1 4 #f)
142+
(check-equal? (lex "#;ü" #t #:modes (list module-lexer))
143+
`(("#;" sexp-comment 1 3 #f)
144+
("ü" comment 3 4 before-lang-line)
115145
(,eof eof #f #f before-lang-line)))
116146

117147
;; Check #; comment handling in `module-lexer` versus `module-lexer*` modes

0 commit comments

Comments
 (0)