Skip to content

Commit aaec78e

Browse files
committed
New module Parsing.Combinators.Array
For fast combinator variations which return Array.
1 parent 1ba4b1c commit aaec78e

File tree

6 files changed

+122
-22
lines changed

6 files changed

+122
-22
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Breaking changes:
88

99
New features:
1010

11+
- Add `Array` combinators in a new `Combinators.Array` module (#199 by @jamesdbrock)
12+
1113
Bugfixes:
1214

1315
Other improvements:

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ There are other `String` parsers in the module `Parsing.String.Basic`, for examp
9090

9191
Parser combinators are in this package in the module `Parsing.Combinators`.
9292

93-
A parser combinator is a function which takes a parser as an argument and returns a new parser. The `many` combinator, for example, will repeat a parser as many times as it can. So the parser `many letter` will have type `Parser String (List Char)`.
93+
A parser combinator is a function which takes a parser as an argument and returns a new parser. The `many` combinator, for example, will repeat a parser as many times as it can. So the parser `many letter` will have type `Parser String (Array Char)`.
9494

9595
Running the parser
9696

9797
```purescript
9898
runParser "aBabaB" (many ayebee)
9999
```
100100

101-
will return `Right (true : false : true : Nil)`.
101+
will return `Right [true, false, true]`.
102102

103103
## Stack-safety
104104

bench/Main.purs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ import Effect.Exception (throw)
7070
import Effect.Unsafe (unsafePerformEffect)
7171
import Parsing (runParser)
7272
import Parsing.Combinators (chainl, chainr, many, manyTill, manyTill_, sepBy, sepEndBy1, skipMany)
73+
import Parsing.Combinators.Array as Combinators.Array
7374
import Parsing.String (anyChar, eof, string)
7475
import Parsing.String.Basic (digit)
7576
import Performance.Minibench (benchWith)
@@ -160,6 +161,8 @@ main = do
160161
$ \_ -> throwLeft $ runParser string23_10000 (many anyChar)
161162
htmlTableWrap "runParser Array.many anyChar 10000" $ benchWith 50
162163
$ \_ -> throwLeft $ runParser string23_10000 (Array.many anyChar)
164+
htmlTableWrap "runParser Combinators.Array.many anyChar 10000" $ benchWith 50
165+
$ \_ -> throwLeft $ runParser string23_10000 (Combinators.Array.many anyChar)
163166

164167
log "<th><h2>skipMany anyChar 10000</h2></th>"
165168
htmlTableWrap "runParser skipMany anyChar 10000" $ benchWith 50

src/Parsing/Combinators.purs

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
-- |
1616
-- | ### Data.Array
1717
-- |
18-
-- | Expect better parsing speed from the `List`-based combinators in this
19-
-- | module than from `Array`-based combinators.
18+
-- | The `many` and `many1` combinators in the __Parsing.Combinators.Array__
19+
-- | module are faster.
2020
-- |
2121
-- | * [Data.Array.many](https://pursuit.purescript.org/packages/purescript-arrays/docs/Data.Array#v:many)
2222
-- | * [Data.Array.some](https://pursuit.purescript.org/packages/purescript-arrays/docs/Data.Array#v:some)
@@ -204,15 +204,15 @@ lookAhead (ParserT k1) = ParserT
204204
(mkFn2 \_ res -> runFn2 done state1 res)
205205
)
206206

207-
-- | Match the parser `p` as many times as possible.
207+
-- | Match the phrase `p` as many times as possible.
208208
-- |
209209
-- | If `p` never consumes input when it
210210
-- | fails then `many p` will always succeed,
211211
-- | but may return an empty list.
212212
many :: forall s m a. ParserT s m a -> ParserT s m (List a)
213213
many = List.manyRec
214214

215-
-- | Match one or more times.
215+
-- | Match the phrase `p` as many times as possible, at least once.
216216
many1 :: forall m s a. ParserT s m a -> ParserT s m (NonEmptyList a)
217217
many1 p = NEL.cons' <$> p <*> List.manyRec p
218218

@@ -434,14 +434,13 @@ manyTill_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tupl
434434
manyTill_ p end = tailRecM go Nil
435435
where
436436
go :: List a -> ParserT s m (Step (List a) (Tuple (List a) e))
437-
go xs =
437+
go xs = alt
438438
do
439439
t <- end
440440
pure (Done (Tuple (reverse xs) t))
441-
<|>
442-
do
443-
x <- p
444-
pure (Loop (x : xs))
441+
do
442+
x <- p
443+
pure (Loop (x : xs))
445444

446445
-- | Parse the phrase as many times as possible, at least *N* times, but no
447446
-- | more than *M* times.
@@ -462,17 +461,15 @@ manyIndex from to p =
462461
go (Tuple i xs) =
463462
if i >= to then
464463
pure (Done (Tuple i (reverse xs)))
465-
else
466-
( do
467-
x <- p i
468-
pure (Loop (Tuple (i + 1) (x : xs)))
469-
)
470-
<|>
471-
( if i >= from then
472-
pure (Done (Tuple i (reverse xs)))
473-
else
474-
fail "Expected more phrases"
475-
)
464+
else alt
465+
do
466+
x <- p i
467+
pure (Loop (Tuple (i + 1) (x : xs)))
468+
do
469+
if i >= from then
470+
pure (Done (Tuple i (reverse xs)))
471+
else
472+
fail "Expected more phrases"
476473

477474
-- | If the parser succeeds without advancing the input stream position,
478475
-- | then force the parser to fail.

src/Parsing/Combinators/Array.purs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
-- | These combinators will produce `Array`s, as opposed to the other combinators
2+
-- | of the same names in the __Parsing.Combinators__ module
3+
-- | which mostly produce `List`s. These `Array` combinators will run in a bit
4+
-- | less time (*~85% runtime*) than the similar `List` combinators, and they will run in a
5+
-- | lot less time (*~10% runtime*) than the similar combinators in __Data.Array__.
6+
-- |
7+
-- | If there is some other combinator which returns
8+
-- | a `List` but we want an `Array`, and there is no `Array` version of the
9+
-- | combinator in this module, then we can rely on the
10+
-- | [__`Data.Array.fromFoldable`__](https://pursuit.purescript.org/packages/purescript-arrays/docs/Data.Array#v:fromFoldable)
11+
-- | function for a pretty fast transformation from `List` to `Array`.
12+
module Parsing.Combinators.Array
13+
( many
14+
, many1
15+
, manyTill_
16+
, manyIndex
17+
) where
18+
19+
import Prelude
20+
21+
import Control.Alt (alt)
22+
import Control.Monad.Rec.Class (Step(..), tailRecM)
23+
import Data.Array as Array
24+
import Data.Array.NonEmpty (NonEmptyArray)
25+
import Data.Array.NonEmpty as Array.NonEmpty
26+
import Data.List (List(..), (:))
27+
import Data.Maybe (Maybe(..))
28+
import Data.Tuple (Tuple(..))
29+
import Parsing (ParserT, fail)
30+
import Parsing.Combinators (try)
31+
32+
-- | Match the phrase `p` as many times as possible.
33+
-- |
34+
-- | If `p` never consumes input when it
35+
-- | fails then `many p` will always succeed,
36+
-- | but may return an empty array.
37+
many :: forall s m a. ParserT s m a -> ParserT s m (Array a)
38+
many p = do
39+
rlist <- flip tailRecM Nil $ \xs -> alt
40+
do
41+
x <- try p
42+
pure (Loop (x : xs))
43+
do
44+
pure (Done xs)
45+
pure $ Array.reverse $ Array.fromFoldable rlist
46+
47+
-- | Match the phrase `p` as many times as possible, at least once.
48+
many1 :: forall s m a. ParserT s m a -> ParserT s m (NonEmptyArray a)
49+
many1 p = do
50+
xs <- many p
51+
case Array.NonEmpty.fromArray xs of
52+
Nothing -> fail "Expected at least 1"
53+
Just xs' -> pure xs'
54+
55+
-- | Parse many phrases until the terminator phrase matches.
56+
-- | Returns the list of phrases and the terminator phrase.
57+
manyTill_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (Array a) e)
58+
manyTill_ p end = do
59+
Tuple rlist e <- flip tailRecM Nil \xs -> alt
60+
do
61+
t <- end
62+
pure (Done (Tuple xs t))
63+
do
64+
x <- p
65+
pure (Loop (x : xs))
66+
pure $ Tuple (Array.reverse $ Array.fromFoldable rlist) e
67+
68+
-- | Parse the phrase as many times as possible, at least *N* times, but no
69+
-- | more than *M* times.
70+
-- | If the phrase can’t parse as least *N* times then the whole
71+
-- | parser fails. If the phrase parses successfully *M* times then stop.
72+
-- | The current phrase index, starting at *0*, is passed to the phrase.
73+
-- |
74+
-- | Returns the array of parse results and the number of results.
75+
-- |
76+
-- | `manyIndex n n (\_ -> p)` is equivalent to `replicateA n p`.
77+
manyIndex :: forall s m a. Int -> Int -> (Int -> ParserT s m a) -> ParserT s m (Tuple Int (Array a))
78+
manyIndex from to p =
79+
if from > to || from < 0 then
80+
pure (Tuple 0 [])
81+
else do
82+
Tuple n rlist <- tailRecM go (Tuple 0 Nil)
83+
pure $ Tuple n $ Array.reverse $ Array.fromFoldable rlist
84+
where
85+
go (Tuple i xs) =
86+
if i >= to then
87+
pure (Done (Tuple i xs))
88+
else alt
89+
do
90+
x <- p i
91+
pure (Loop (Tuple (i + 1) (x : xs)))
92+
do
93+
if i >= from then
94+
pure (Done (Tuple i xs))
95+
else
96+
fail "Expected more phrases"

test/Main.purs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import Effect.Unsafe (unsafePerformEffect)
3535
import Node.Process (lookupEnv)
3636
import Parsing (ParseError(..), Parser, ParserT, Position(..), consume, fail, initialPos, parseErrorMessage, parseErrorPosition, position, region, runParser)
3737
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, (<?>), (<??>), (<~?>))
38+
import Parsing.Combinators.Array as Combinators.Array
3839
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
3940
import Parsing.Language (haskellDef, haskellStyle, javaStyle)
4041
import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, regex, rest, satisfy, string, takeN)
@@ -594,6 +595,7 @@ main = do
594595

595596
parseTest "(((a)))" 3 nested
596597
parseTest "aaa" (Cons "a" (Cons "a" (Cons "a" Nil))) $ many (string "a")
598+
parseTest "abc-" [ 'a', 'b', 'c' ] $ Combinators.Array.many letter
597599
parseTest "(ab)" (Just "b") $ parens do
598600
_ <- string "a"
599601
optionMaybe $ string "b"

0 commit comments

Comments
 (0)