Skip to content

Commit 8d3be96

Browse files
authored
Merge pull request #5654 from unisonweb/lookahead
Add lookahead and negative lookahead to pattern builtins
2 parents 6d0178a + b88baa4 commit 8d3be96

File tree

22 files changed

+1032
-337
lines changed

22 files changed

+1032
-337
lines changed

parser-typechecker/src/Unison/Builtin.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ names = Names terms types
8383
isBuiltinType :: R.Reference -> Bool
8484
isBuiltinType =
8585
let refs = Set.fromList (map snd builtinTypes)
86-
in (`Set.member` refs)
86+
in (`Set.member` refs)
8787

8888
typeLookup :: TL.TypeLookup Symbol Ann
8989
typeLookup =
@@ -521,6 +521,8 @@ builtinsSrc =
521521
B "Pattern.captureAs" $ forall1 "a" (\a -> a --> pat a --> pat a),
522522
B "Pattern.join" $ forall1 "a" (\a -> list (pat a) --> pat a),
523523
B "Pattern.or" $ forall1 "a" (\a -> pat a --> pat a --> pat a),
524+
B "Pattern.lookahead" $ forall1 "a" (\a -> pat a --> pat a),
525+
B "Pattern.negativeLookahead" $ forall1 "a" (\a -> pat a --> pat a),
524526
-- Pattern.run : Pattern a -> a -> Optional ([a], a)
525527
B "Pattern.run" $ forall1 "a" (\a -> pat a --> a --> optionalt (tuple [list a, a])),
526528
B "Pattern.isMatch" $ forall1 "a" (\a -> pat a --> a --> boolean),

parser-typechecker/src/Unison/Util/Text/Pattern.hs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ data Pattern
1717
| Eof -- succeed if given the empty text, fail otherwise
1818
| Literal Text -- succeed if input starts with the given text, advance by that text
1919
| Char CharPattern -- succeed if input starts with a char matching the given pattern, advance by 1 char
20+
| Lookahead Pattern -- Positive lookahead
21+
| NegativeLookahead Pattern -- Negative lookahead
2022
deriving (Show, Eq, Ord)
2123

2224
data CharPattern
@@ -106,7 +108,7 @@ capturesToList c = c []
106108
type Compiled r = (Stack -> Text -> r) -> (Stack -> Text -> r) -> Stack -> Text -> r
107109

108110
compile :: Pattern -> Compiled r
109-
compile !Eof !err !success = go
111+
compile Eof !err !success = go
110112
where
111113
go acc t
112114
| Text.size t == 0 = success acc t
@@ -194,6 +196,12 @@ compile (Replicate m n p) !err !success = case p of
194196
dropper ok acc t
195197
| (i, rest) <- Text.dropWhileMax ok n t, i >= m = success acc rest
196198
| otherwise = err acc t
199+
compile (Lookahead p) !err !success = cp
200+
where
201+
cp = lookahead "Lookahead" (compile p) err success
202+
compile (NegativeLookahead p) !err !success = cp
203+
where
204+
cp = lookahead "NegativeLookahead" (compile p) success err
197205

198206
charInPred, charNotInPred :: [Char] -> Char -> Bool
199207
charInPred [] = const False
@@ -236,3 +244,17 @@ try msg c err success stk rem =
236244
Mark _ rem stk -> err stk rem
237245
_ -> error $ "Pattern compiler error in: " <> msg
238246
{-# INLINE try #-}
247+
248+
-- runs c and restores state to what it was before,
249+
-- regardless of whether it succeeds or not
250+
lookahead :: String -> Compiled r -> Compiled r
251+
lookahead msg c err success stk rem =
252+
c err' success' (Mark id rem stk) rem
253+
where
254+
success' stk _ = case stk of
255+
Mark _ rem stk -> success stk rem
256+
_ -> error $ "Pattern compiler error in: " <> msg
257+
err' stk _ = case stk of
258+
Mark _ rem stk -> err stk rem
259+
_ -> error $ "Pattern compiler error in: " <> msg
260+
{-# INLINE lookahead #-}

scheme-libs/racket/unison/pattern.rkt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
[join (-> pattern? ... pattern?)]
3636
[join* (-> chunked-list? pattern?)]
3737
[choice (-> pattern? pattern? ... pattern?)]
38+
[lookahead (-> pattern? pattern?)]
39+
[negative-lookahead (-> pattern? pattern?)]
3840
[capture (-> pattern? pattern?)]
3941
[capture-as (-> any/c pattern? pattern?)]
4042
[many (-> pattern? pattern?)]
@@ -57,6 +59,8 @@
5759
(struct p:literal (cstr) #:transparent)
5860
(struct p:join (pats) #:transparent)
5961
(struct p:or (left right) #:transparent)
62+
(struct p:lookahead (pat) #:transparent)
63+
(struct p:negative-lookahead (pat) #:transparent)
6064
(struct p:capture (pat) #:transparent)
6165
(struct p:capture-as (cap pat) #:transparent)
6266
(struct p:many (pat) #:transparent)
@@ -126,6 +130,10 @@
126130
[(cons pat pats)
127131
(p:or (pattern-pat pat) (loop pats))])))]))
128132

133+
(define (lookahead pat) (make-pattern (p:lookahead (pattern-pat pat))))
134+
135+
(define (negative-lookahead pat) (make-pattern (p:negative-lookahead (pattern-pat pat))))
136+
129137
(define (capture pat) (make-pattern (p:capture (pattern-pat pat))))
130138
(define (capture-as cap pat)
131139
(make-pattern (p:capture-as cap (pattern-pat pat))))
@@ -207,6 +215,26 @@
207215
(ok cstr* captures*)
208216
(right-m cstr captures)))]
209217

218+
[(p:lookahead pat)
219+
(define pat-m (recur pat in-capture? done))
220+
(λ (cstr captures)
221+
(define-values [cstr* captures*] (pat-m cstr captures))
222+
(if cstr*
223+
(ok cstr captures)
224+
(fail))
225+
)
226+
]
227+
228+
[(p:negative-lookahead pat)
229+
(define pat-m (recur pat in-capture? done))
230+
(λ (cstr captures)
231+
(define-values [cstr* captures*] (pat-m cstr captures))
232+
(if cstr*
233+
(fail)
234+
(ok cstr captures))
235+
)
236+
]
237+
210238
[(p:capture pat)
211239
(cond
212240
[in-capture?

scheme-libs/racket/unison/primops/pattern.rkt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@
5151
builtin-Pattern.many:termlink
5252
builtin-Pattern.or
5353
builtin-Pattern.or:termlink
54+
builtin-Pattern.lookahead
55+
builtin-Pattern.lookahead:termlink
56+
builtin-Pattern.negativeLookahead
57+
builtin-Pattern.negativeLookahead:termlink
58+
builtin-Pattern.lookahead:termlink
5459
builtin-Pattern.replicate
5560
builtin-Pattern.replicate:termlink
5661
builtin-Pattern.run
@@ -154,6 +159,10 @@
154159

155160
(define-unison-builtin (builtin-Pattern.or l r) (choice l r))
156161

162+
(define-unison-builtin (builtin-Pattern.lookahead p) (lookahead p))
163+
164+
(define-unison-builtin (builtin-Pattern.negativeLookahead p) (negative-lookahead p))
165+
157166
(define-unison-builtin (builtin-Pattern.replicate m n p)
158167
(replicate p m n))
159168

unison-runtime/src/Unison/Runtime/Builtin.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,8 @@ declareForeigns = do
12501250
declareForeign Untracked 2 Pattern_captureAs
12511251
declareForeign Untracked 1 Pattern_join
12521252
declareForeign Untracked 2 Pattern_or
1253+
declareForeign Untracked 1 Pattern_lookahead
1254+
declareForeign Untracked 1 Pattern_negativeLookahead
12531255
declareForeign Untracked 3 Pattern_replicate
12541256

12551257
declareForeign Untracked 2 Pattern_run

unison-runtime/src/Unison/Runtime/Foreign/Function.hs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,10 @@ foreignCallHelper = \case
834834
evaluate . TPat.cpattern . TPat.Join $ map (\(TPat.CP p _) -> p) ps
835835
Pattern_or -> mkForeign $
836836
\(TPat.CP l _, TPat.CP r _) -> evaluate . TPat.cpattern $ TPat.Or l r
837+
Pattern_lookahead -> mkForeign $
838+
\(TPat.CP p _) -> evaluate . TPat.cpattern $ TPat.Lookahead p
839+
Pattern_negativeLookahead -> mkForeign $
840+
\(TPat.CP p _) -> evaluate . TPat.cpattern $ TPat.NegativeLookahead p
837841
Pattern_replicate -> mkForeign $
838842
\(m0 :: Word64, n0 :: Word64, TPat.CP p _) ->
839843
let m = fromIntegral m0; n = fromIntegral n0

unison-runtime/src/Unison/Runtime/Foreign/Function/Type.hs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ data ForeignFunc
231231
| Pattern_captureAs
232232
| Pattern_join
233233
| Pattern_or
234+
| Pattern_lookahead
235+
| Pattern_negativeLookahead
234236
| Pattern_replicate
235237
| Pattern_run
236238
| Pattern_isMatch
@@ -497,6 +499,8 @@ foreignFuncBuiltinName = \case
497499
Pattern_captureAs -> "Pattern.captureAs"
498500
Pattern_join -> "Pattern.join"
499501
Pattern_or -> "Pattern.or"
502+
Pattern_lookahead -> "Pattern.lookahead"
503+
Pattern_negativeLookahead -> "Pattern.negativeLookahead"
500504
Pattern_replicate -> "Pattern.replicate"
501505
Pattern_run -> "Pattern.run"
502506
Pattern_isMatch -> "Pattern.isMatch"

0 commit comments

Comments
 (0)