Skip to content

Commit d4050ae

Browse files
SiriusStarrGabriella439
authored andcommitted
Correct validation of unicode escapes (#1549)
1 parent f18ec5f commit d4050ae

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

dhall/src/Dhall/Parser/Expression.hs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -597,21 +597,28 @@ parsers embedded = Parsers {..}
597597

598598
let toNumber = Data.List.foldl' (\x y -> x * 16 + y) 0
599599

600-
let fourCharacterEscapeSequence =
601-
fmap toNumber (Control.Monad.replicateM 4 hexNumber)
600+
let fourCharacterEscapeSequence = do
601+
ns <- Control.Monad.replicateM 4 hexNumber
602+
603+
let number = toNumber ns
604+
605+
Control.Monad.guard (validCodepoint number)
606+
<|> fail "Invalid Unicode code point"
607+
608+
return number
602609

603610
let bracedEscapeSequence = do
604611
_ <- char '{'
605612
ns <- some hexNumber
606613

607614
let number = toNumber ns
608615

609-
Control.Monad.guard (number <= 0x10FFFF && validCodepoint (Char.chr number))
616+
Control.Monad.guard (number <= 0x10FFFD && validCodepoint number)
610617
<|> fail "Invalid Unicode code point"
611618

612619
_ <- char '}'
613620

614-
return (toNumber ns)
621+
return number
615622

616623
n <- bracedEscapeSequence <|> fourCharacterEscapeSequence
617624

dhall/src/Dhall/Parser/Token.hs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ module Dhall.Parser.Token (
110110
import Dhall.Parser.Combinators
111111

112112
import Control.Applicative (Alternative(..), optional)
113+
import Data.Bits ((.&.))
113114
import Data.Functor (void)
114115
import Data.Semigroup (Semigroup(..))
115116
import Data.Text (Text)
@@ -135,12 +136,14 @@ import Prelude hiding (const, pi)
135136

136137
import qualified Text.Parser.Token
137138

138-
-- | Returns `True` if the given `Char` is a valid Unicode codepoint
139-
validCodepoint :: Char -> Bool
139+
-- | Returns `True` if the given `Int` is a valid Unicode codepoint
140+
validCodepoint :: Int -> Bool
140141
validCodepoint c =
141-
not (category == Char.Surrogate || category == Char.NotAssigned)
142+
not (category == Char.Surrogate
143+
|| c .&. 0xFFFE == 0xFFFE
144+
|| c .&. 0xFFFF == 0xFFFF)
142145
where
143-
category = Char.generalCategory c
146+
category = Char.generalCategory (Char.chr c)
144147

145148
{-| Parse 0 or more whitespace characters (including comments)
146149

0 commit comments

Comments
 (0)