From e3f81dcb0eb3ba3c039d0e86e5945542a6ec72a9 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Mon, 8 May 2023 16:54:24 -0700 Subject: [PATCH 01/15] Fix up to make tests works --- src/Language/Rust/Pretty/Internal.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Language/Rust/Pretty/Internal.hs b/src/Language/Rust/Pretty/Internal.hs index 1562b1a..559b753 100644 --- a/src/Language/Rust/Pretty/Internal.hs +++ b/src/Language/Rust/Pretty/Internal.hs @@ -947,7 +947,12 @@ printGenerics (Generics lifetimes tyParams _ x) | null lifetimes && null tyParams = mempty | otherwise = let lifetimes' = printLifetimeDef `map` lifetimes bounds' = [ printTyParam param | param<-tyParams ] - in annotate x (group ("<" <##> ungroup (block NoDelim True "," mempty (lifetimes' ++ bounds')) <##> ">")) + in annotate x (group ("<" <##> vsep (go (lifetimes' ++ bounds')) <##> ">")) + where + go [] = [] + go [z] = [ flatAlt (indent n z <> ",") (flatten z) ] + go (z:zs) = flatAlt (indent n z <> ",") (flatten z <> ",") : go zs + -- | Print a poly-trait ref (@print_poly_trait_ref@) printPolyTraitRef :: PolyTraitRef a -> Doc a From f1b4a92ece10a8397604bed546ed3cd6144aa4b8 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Mon, 8 May 2023 16:54:41 -0700 Subject: [PATCH 02/15] Update to more recent Aeson --- test/rustc-tests/DiffUtils.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/rustc-tests/DiffUtils.hs b/test/rustc-tests/DiffUtils.hs index 5e5f843..b827d0d 100644 --- a/test/rustc-tests/DiffUtils.hs +++ b/test/rustc-tests/DiffUtils.hs @@ -5,7 +5,7 @@ module DiffUtils where import qualified Data.Aeson as Aeson -import qualified Data.HashMap.Lazy as HM +import qualified Data.Aeson.KeyMap as Aeson import qualified Data.Vector as V import qualified Data.List.NonEmpty as N import Control.Monad @@ -24,10 +24,16 @@ instance IsString AesonKey where fromString = Key -- | Accessor method for JSON with helpful error messages. (!) :: Aeson.Value -> AesonKey -> Aeson.Value -val@(Aeson.Object hashmap) ! Key key = - case HM.lookup (fromString key) hashmap of - Nothing -> error $ "No key `" ++ key ++ "' on JSON object `" ++ showAeson val ++ "'" +val@(Aeson.Object hashmap) ! key = + case Aeson.lookup (fromString keyString) hashmap of + Nothing -> error $ "No key `" ++ keyString ++ "' on JSON object `" ++ showAeson val ++ "'" Just v -> v + where + keyString = + case key of + Index i -> show i + Key x -> x + val ! Key key = error $ "Cannot lookup key `" ++ key ++ "' on non-object JSON `" ++ showAeson val ++ "'" val@(Aeson.Array vect) ! Index key = case vect V.!? key of @@ -41,7 +47,7 @@ showAeson = unpack . Aeson.encode -- | Accessor method for JSON which fails with 'Nothing' (!?) :: Aeson.Value -> AesonKey -> Maybe Aeson.Value -Aeson.Object hashmap !? Key key = HM.lookup (fromString key) hashmap +Aeson.Object hashmap !? Key key = Aeson.lookup (fromString key) hashmap Aeson.Array vect !? Index key = vect V.!? key _ !? _ = Nothing From a378c99c8d6471fda102ef884ece104a4e5dd6ef Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Mon, 8 May 2023 16:56:31 -0700 Subject: [PATCH 03/15] Fix `Ord` instance for Identifiers. Previously the instance was incorrect because it'd cause an infinite loop. This version rearranges the fields of the records to ensure that the hash field is first, which makes it possible to derive Eq and Ord. We also do a bunch of refactoring to use record notation instead of constructor pattern matching, to make it easier to do similar refactoring in the future. --- src/Language/Rust/Data/Ident.hs | 28 +++++++++++----------------- src/Language/Rust/Parser/Internal.y | 8 ++++---- src/Language/Rust/Parser/Lexer.x | 2 +- src/Language/Rust/Pretty/Internal.hs | 5 ++--- src/Language/Rust/Pretty/Resolve.hs | 14 +++++++------- test/rustc-tests/Diff.hs | 4 ++-- 6 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/Language/Rust/Data/Ident.hs b/src/Language/Rust/Data/Ident.hs index 421552f..7ba5f80 100644 --- a/src/Language/Rust/Data/Ident.hs +++ b/src/Language/Rust/Data/Ident.hs @@ -26,12 +26,14 @@ import Data.Char ( ord ) import Data.String ( IsString(..) ) import Data.Semigroup as Sem --- | An identifier +-- | An identifier. +-- Note that the order of the fields is important, so the +-- when we derive `Eq` and `Ord` we use the hash first. data Ident - = Ident { name :: Name -- ^ payload of the identifier + = Ident { hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision + , name :: Name -- ^ payload of the identifier , raw :: Bool -- ^ whether the identifier is raw - , hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision - } deriving (Data, Typeable, Generic, NFData) + } deriving (Data, Typeable, Generic, NFData, Eq, Ord) -- | Shows the identifier as a string (for use with @-XOverloadedStrings@) instance Show Ident where @@ -40,17 +42,6 @@ instance Show Ident where instance IsString Ident where fromString = mkIdent --- | Uses 'hash' to short-circuit -instance Eq Ident where - i1 == i2 = hash i1 == hash i2 && name i1 == name i2 && raw i1 == raw i2 - i1 /= i2 = hash i1 /= hash i2 || name i1 /= name i2 || raw i1 /= raw i2 - --- | Uses 'hash' to short-circuit -instance Ord Ident where - compare i1 i2 = case compare i1 i2 of - EQ -> compare (raw i1, name i1) (raw i2, name i2) - rt -> rt - -- | "Forgets" about whether either argument was raw instance Monoid Ident where mappend = (<>) @@ -58,12 +49,15 @@ instance Monoid Ident where -- | "Forgets" about whether either argument was raw instance Sem.Semigroup Ident where - Ident n1 _ _ <> Ident n2 _ _ = mkIdent (n1 <> n2) + i1 <> i2 = mkIdent (name i1 <> name i2) -- | Smart constructor for making an 'Ident'. mkIdent :: String -> Ident -mkIdent s = Ident s False (hashString s) +mkIdent s = Ident { hash = hashString s + , name = s + , raw = False + } -- | Hash a string into an 'Int' hashString :: String -> Int diff --git a/src/Language/Rust/Parser/Internal.y b/src/Language/Rust/Parser/Internal.y index 4753c6c..4d67bb5 100644 --- a/src/Language/Rust/Parser/Internal.y +++ b/src/Language/Rust/Parser/Internal.y @@ -585,7 +585,7 @@ self_or_ident :: { Spanned Ident } ----------- lifetime :: { Lifetime Span } - : LIFETIME { let Spanned (LifetimeTok (Ident l _ _)) s = $1 in Lifetime l s } + : LIFETIME { let Spanned (LifetimeTok l) s = $1 in Lifetime (name l) s } -- parse_trait_ref() trait_ref :: { TraitRef Span } @@ -1125,7 +1125,7 @@ blockpostfix_expr :: { Expr Span } -- labels on loops label :: { Label Span } - : LIFETIME { let Spanned (LifetimeTok (Ident l _ _)) s = $1 in Label l s } + : LIFETIME { let Spanned (LifetimeTok l) s = $1 in Label (name l) s } -- Literal expressions (composed of just literals) lit_expr :: { Expr Span } @@ -1904,8 +1904,8 @@ addAttrs as (Yield as' e s) = Yield (as ++ as') e s -- | Given a 'LitTok' token that is expected to result in a valid literal, construct the associated -- literal. Note that this should _never_ fail on a token produced by the lexer. lit :: Spanned Token -> Lit Span -lit (Spanned (IdentTok (Ident "true" False _)) s) = Bool True Unsuffixed s -lit (Spanned (IdentTok (Ident "false" False _)) s) = Bool False Unsuffixed s +lit (Spanned (IdentTok Ident { name = "true", raw = False }) s) = Bool True Unsuffixed s +lit (Spanned (IdentTok Ident { name = "false", raw = False }) s) = Bool False Unsuffixed s lit (Spanned (LiteralTok litTok suffix_m) s) = translateLit litTok suffix s where suffix = case suffix_m of diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index 0f44c80..e08332b 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -1104,7 +1104,7 @@ literal lit = do AlexToken (pos',inp') len action -> do tok <- action (peekChars len inp) case tok of - IdentTok (Ident suf False _) -> do + IdentTok Ident { name = suf, raw = False } -> do setPosition pos' setInput inp' pure (LiteralTok lit (Just suf)) diff --git a/src/Language/Rust/Pretty/Internal.hs b/src/Language/Rust/Pretty/Internal.hs index 559b753..1f0ae37 100644 --- a/src/Language/Rust/Pretty/Internal.hs +++ b/src/Language/Rust/Pretty/Internal.hs @@ -124,8 +124,7 @@ printName = pretty -- | Print an identifier printIdent :: Ident -> Doc a -printIdent (Ident s False _) = pretty s -printIdent (Ident s True _) = "r#" <> pretty s +printIdent n = if raw n then "r#" <> pretty (name n) else pretty (name n) -- | Print a type (@print_type@ with @print_ty_fn@ inlined) -- Types are expected to always be only one line @@ -599,7 +598,7 @@ printAttr (SugaredDoc Outer False c x) _ = annotate x (flatAlt ("///" <> pretty -- | Print an identifier as is, or as cooked string if containing a hyphen printCookedIdent :: Ident -> Doc a -printCookedIdent ident@(Ident str raw _) +printCookedIdent ident@(Ident { name = str, raw = raw }) | '-' `elem` str && not raw = printStr Cooked str | otherwise = printIdent ident diff --git a/src/Language/Rust/Pretty/Resolve.hs b/src/Language/Rust/Pretty/Resolve.hs index a4d791e..d9db315 100644 --- a/src/Language/Rust/Pretty/Resolve.hs +++ b/src/Language/Rust/Pretty/Resolve.hs @@ -231,7 +231,7 @@ instance (Typeable a, Monoid a) => Resolve (SourceFile a) where resolveM = resol -- * it is a keyword -- resolveIdent :: Ident -> ResolveM Ident -resolveIdent i@(Ident s r _) = +resolveIdent i@(Ident { name = s, raw = r }) = scope i $ case toks of Right [Spanned (IdentTok i') _] | i /= i' -> err i ("identifier `" ++ s ++ "' does not lex properly") @@ -359,10 +359,10 @@ resolvePath t p@(Path g segs x) = scope p $ resolveSeg :: (Typeable a, Monoid a) => PathSegment a -> ResolveM (PathSegment a) resolveSeg (PathSegment i a x') = do i' <- case i of - Ident "self" False _ -> pure i - Ident "Self" False _ -> pure i - Ident "super" False _ -> pure i - Ident "crate" False _ -> pure i + Ident { name = "self", raw = False } -> pure i + Ident { name = "Self", raw = False } -> pure i + Ident { name = "super", raw = False } -> pure i + Ident { name = "crate", raw = False } -> pure i _ -> resolveIdent i a' <- traverse resolvePathParameters a pure (PathSegment i' a' x') @@ -544,8 +544,8 @@ resolveArg GeneralArg a@(Arg p t x) = scope a $ do -- | Check whether an argument is one of the "self"-alike forms isSelfAlike :: Arg a -> Bool -isSelfAlike (Arg Nothing (PathTy Nothing (Path False [PathSegment (Ident "self" False _) Nothing _] _) _) _) = True -isSelfAlike (Arg Nothing (Rptr _ _ (PathTy Nothing (Path False [PathSegment (Ident "self" False _) Nothing _] _) _) _) _) = True +isSelfAlike (Arg Nothing (PathTy Nothing (Path False [PathSegment Ident { name = "self", raw = False } Nothing _] _) _) _) = True +isSelfAlike (Arg Nothing (Rptr _ _ (PathTy Nothing (Path False [PathSegment Ident { name = "self", raw = False } Nothing _] _) _) _) _) = True isSelfAlike _ = False instance (Typeable a, Monoid a) => Resolve (Arg a) where resolveM = resolveArg NamedArg diff --git a/test/rustc-tests/Diff.hs b/test/rustc-tests/Diff.hs index 4dea76d..291a2ad 100644 --- a/test/rustc-tests/Diff.hs +++ b/test/rustc-tests/Diff.hs @@ -636,8 +636,8 @@ instance Show a => Diffable (Field a) where me === (val ! "expr") instance Diffable Ident where - Ident i _ _ === String s | fromString i == s = pure () - ident' === val = diff "identifiers are different" ident' val + i === String s | fromString (name i) == s = pure () + ident' === val = diff "identifiers are different" ident' val -- | The empty identifier is invalid invalidIdent :: Ident From 9ff9176b6707c259c2546c2500867181372ce6da Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Tue, 9 May 2023 08:48:31 -0700 Subject: [PATCH 04/15] Don't use `raw` in comparisons. --- src/Language/Rust/Data/Ident.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Language/Rust/Data/Ident.hs b/src/Language/Rust/Data/Ident.hs index 7ba5f80..f68adac 100644 --- a/src/Language/Rust/Data/Ident.hs +++ b/src/Language/Rust/Data/Ident.hs @@ -33,7 +33,13 @@ data Ident = Ident { hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision , name :: Name -- ^ payload of the identifier , raw :: Bool -- ^ whether the identifier is raw - } deriving (Data, Typeable, Generic, NFData, Eq, Ord) + } deriving (Data, Typeable, Generic, NFData) + +instance Eq Ident where + x == y = (hash x, name x) == (hash y, name y) + +instance Ord Ident where + compare x y = compare (hash x, name x) (hash y, name y) -- | Shows the identifier as a string (for use with @-XOverloadedStrings@) instance Show Ident where From e30c6000d5a91d1fa66d15bd70d9377b19a9a92f Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Tue, 9 May 2023 08:55:24 -0700 Subject: [PATCH 05/15] Use standard definition for mappend --- src/Language/Rust/Parser/Reversed.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Language/Rust/Parser/Reversed.hs b/src/Language/Rust/Parser/Reversed.hs index 2c14511..8df0d1f 100644 --- a/src/Language/Rust/Parser/Reversed.hs +++ b/src/Language/Rust/Parser/Reversed.hs @@ -51,7 +51,7 @@ instance Sem.Semigroup (f a) => Sem.Semigroup (Reversed f a) where instance Monoid (f a) => Monoid (Reversed f a) where mempty = Reversed mempty - mappend (Reversed xs) (Reversed ys) = Reversed (mappend ys xs) + mappend = (<>) instance G.IsList (f a) => G.IsList (Reversed f a) where type Item (Reversed f a) = G.Item (f a) From 6ea7a24540ef348cb16143268570923e2cbc2d26 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Tue, 9 May 2023 08:55:48 -0700 Subject: [PATCH 06/15] Unused import --- src/Language/Rust/Pretty/Resolve.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Language/Rust/Pretty/Resolve.hs b/src/Language/Rust/Pretty/Resolve.hs index d9db315..308367a 100644 --- a/src/Language/Rust/Pretty/Resolve.hs +++ b/src/Language/Rust/Pretty/Resolve.hs @@ -85,7 +85,6 @@ import Data.List ( find ) import Data.List.NonEmpty ( NonEmpty(..) ) import qualified Data.List.NonEmpty as N import Data.Maybe ( fromJust ) -import Data.Semigroup ( (<>) ) {-# ANN module "HLint: ignore Reduce duplication" #-} From 6cb5c281664dd53d7b15005a3db0d185ac840a25 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Tue, 9 May 2023 08:56:08 -0700 Subject: [PATCH 07/15] Update to avoid using deprecated imports --- src/Language/Rust/Pretty.hs | 6 +++--- src/Language/Rust/Pretty/Literals.hs | 2 +- src/Language/Rust/Pretty/Util.hs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Language/Rust/Pretty.hs b/src/Language/Rust/Pretty.hs index accb0fb..fb0dfc9 100644 --- a/src/Language/Rust/Pretty.hs +++ b/src/Language/Rust/Pretty.hs @@ -77,9 +77,9 @@ import Language.Rust.Pretty.Resolve import System.IO ( Handle ) import Data.Typeable ( Typeable ) -import Data.Text.Prettyprint.Doc.Render.Text ( renderIO ) -import Data.Text.Prettyprint.Doc ( Doc ) -import qualified Data.Text.Prettyprint.Doc as PP +import Prettyprinter.Render.Text ( renderIO ) +import Prettyprinter ( Doc ) +import qualified Prettyprinter as PP import Control.Exception ( throw ) diff --git a/src/Language/Rust/Pretty/Literals.hs b/src/Language/Rust/Pretty/Literals.hs index f37d7e4..e14a31e 100644 --- a/src/Language/Rust/Pretty/Literals.hs +++ b/src/Language/Rust/Pretty/Literals.hs @@ -19,7 +19,7 @@ module Language.Rust.Pretty.Literals ( import Language.Rust.Syntax.AST import Language.Rust.Pretty.Util -import Data.Text.Prettyprint.Doc ( hcat, annotate, (<>), Doc, pretty, group, hardline, flatAlt ) +import Prettyprinter ( hcat, annotate, Doc, pretty, group, hardline, flatAlt ) import Data.Char ( intToDigit, ord, chr ) import Data.Word ( Word8 ) diff --git a/src/Language/Rust/Pretty/Util.hs b/src/Language/Rust/Pretty/Util.hs index 509c62d..181e9c5 100644 --- a/src/Language/Rust/Pretty/Util.hs +++ b/src/Language/Rust/Pretty/Util.hs @@ -25,8 +25,8 @@ module Language.Rust.Pretty.Util where import Data.Monoid as M -import qualified Data.Text.Prettyprint.Doc as PP -import Data.Text.Prettyprint.Doc.Internal.Type ( Doc(..) ) +import qualified Prettyprinter as PP +import Prettyprinter.Internal.Type ( Doc(..) ) import Language.Rust.Syntax.Token ( Delim(..) ) From 82acd312c0cd2b467ea8fa2a0feb316185b52c69 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Mon, 18 Sep 2023 10:08:35 +0300 Subject: [PATCH 08/15] Relax upper bounds --- language-rust.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/language-rust.cabal b/language-rust.cabal index f9c4331..76ab2c6 100644 --- a/language-rust.cabal +++ b/language-rust.cabal @@ -86,9 +86,9 @@ library build-depends: base >=4.9 && <5.0 , prettyprinter >=1.0 && <2.0 - , transformers >=0.4 && <0.6 + , transformers >=0.4 && <0.7 , array >=0.5 && <0.6 - , deepseq >=1.1 && <1.5 + , deepseq >=1.1 && <1.6 if flag(useByteStrings) cpp-options: -DUSE_BYTESTRING From f53eca6ea1b6222cf9607aa83a57958a739b96d4 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Tue, 3 Sep 2024 09:23:14 -0400 Subject: [PATCH 09/15] Revert "Don't use `raw` in comparisons." This reverts commit 9ff9176b6707c259c2546c2500867181372ce6da. Per the discussion in #6, having the `Eq` and `Ord` instances ignore the `raw` field of `Ident` causes more trouble than it's worth, as it causes the parser to incorrectly deem raw identifiers like `r#return` to be keywords. While we could fix this issue by changing the parser, this would take quite a bit of code changes to accomplish. As such, we revert the change here, and we make a note in the Haddocks for the `Eq` and `Ord` instances to beware of the fact that `raw` is taken into account. After this change, the `rustc-tests` test suite passes once more. As such, this change fixes #6. --- src/Language/Rust/Data/Ident.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Language/Rust/Data/Ident.hs b/src/Language/Rust/Data/Ident.hs index f68adac..512110a 100644 --- a/src/Language/Rust/Data/Ident.hs +++ b/src/Language/Rust/Data/Ident.hs @@ -33,13 +33,15 @@ data Ident = Ident { hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision , name :: Name -- ^ payload of the identifier , raw :: Bool -- ^ whether the identifier is raw - } deriving (Data, Typeable, Generic, NFData) - -instance Eq Ident where - x == y = (hash x, name x) == (hash y, name y) - -instance Ord Ident where - compare x y = compare (hash x, name x) (hash y, name y) + } + deriving ( Data, Typeable, Generic, NFData + -- | Note that this instance takes the 'raw' field into account, so + -- the identifiers @x@ and @r#x@ are judged /not/ to be equal. + , Eq + -- | Note that this instance takes the 'raw' field into account, so + -- the identifiers @x@ and @r#x@ are judged /not/ to be equal. + , Ord + ) -- | Shows the identifier as a string (for use with @-XOverloadedStrings@) instance Show Ident where From 9acd23debed5d99ecb0431fe77c09004d08db5a9 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Tue, 3 Sep 2024 09:44:23 -0400 Subject: [PATCH 10/15] CI: Migrate from Travis to GitHub Actions Fixes #5. --- .github/workflows/ci.yaml | 64 +++++++++++++++++++++++++++++++++++++++ .travis.yml | 36 ---------------------- 2 files changed, 64 insertions(+), 36 deletions(-) create mode 100644 .github/workflows/ci.yaml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..565d313 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,64 @@ +name: CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +# The CACHE_VERSION can be updated to force the use of a new cache if +# the current cache contents become corrupted/invalid. This can +# sometimes happen when (for example) the OS version is changed but +# older .so files are cached, which can have various effects +# (e.g. cabal complains it can't find a valid version of the "happy" +# tool). +env: + CACHE_VERSION: 1 + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-22.04] + ghc: ["9.4.8", "9.6.6", "9.8.2"] + cabal: ["3.10.3.0"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - uses: haskell-actions/setup@v2 + id: setup-haskell + with: + ghc-version: ${{ matrix.ghc }} + cabal-version: ${{ matrix.cabal }} + + - uses: actions/cache/restore@v4 + name: Restore cabal store cache + with: + path: | + ${{ steps.setup-haskell.outputs.cabal-store }} + dist-newstyle + key: ${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}-${{ github.sha }} + restore-keys: | + ${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}- + + - name: Update + run: cabal update + - name: Configure + run: cabal configure --enable-tests + - name: Build + run: cabal build + - name: Run tests + run: cabal test + + - uses: actions/cache/save@v4 + name: Save cabal store cache + if: always() + with: + path: | + ${{ steps.setup-haskell.outputs.cabal-store }} + dist-newstyle + key: ${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}-${{ github.sha }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 95caabf..0000000 --- a/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Sudo used for custom apt setup -sudo: true - -# Add new environments to the build here: -env: - - GHCVER=8.0.2 CABALVER=3.0 - - GHCVER=8.2.2 CABALVER=3.0 - - GHCVER=8.4.4 CABALVER=3.0 - - GHCVER=8.6.5 CABALVER=3.0 - - GHCVER=8.8.1 CABALVER=3.0 - - GHCVER=head CABALVER=head - -# Allow for develop branch to break -matrix: - allow_failures: - - env: GHCVER=8.8.1 CABALVER=3.0 - - env: GHCVER=head CABALVER=head - -# Manually install ghc and cabal -before_install: - - travis_retry sudo add-apt-repository -y ppa:hvr/ghc - - travis_retry sudo apt-get update - - travis_retry sudo apt-get install cabal-install-$CABALVER ghc-$GHCVER - - export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$PATH - - export PATH=$HOME/.cabal/bin:$PATH - - travis_retry cabal update - -# Install Happy and Alex first, before installing -install: - - echo $PATH - - cabal --version - - ghc --version - - cabal configure --verbose --enable-tests - -script: - - cabal test From fd184b15a5b69e42f50c1ed62e2bb771b43eefcf Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Wed, 28 Aug 2024 15:46:46 -0400 Subject: [PATCH 11/15] Whitespace only --- src/Language/Rust/Parser/Lexer.x | 68 ++++++++++++++++---------------- test/unit-tests/LexerTest.hs | 8 ++-- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index e08332b..1b65ac4 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -18,7 +18,7 @@ bitwise and, and unary reference), @&&&x&&&y@ lexes into 'AmpersandAmpersand', ' @'IdentTok' "x"@, 'AmpersandAmpersand', 'Ampersand', @'IdentTok' "y"@. Although the parser sometimes needs to "break apart" tokens, it never has to think about putting them together. That means it can easily figure out that @&&&x&&&y@ parses as @&(&(&x)) && (&y)@ and not @&(&(&x)) & (&(&y))@ even if -bitwise conjunctions bind more tightly that logical conjunctions. +bitwise conjunctions bind more tightly that logical conjunctions. This sort of amguity where one token need to be broken up by the parser occurs for @@ -27,7 +27,7 @@ This sort of amguity where one token need to be broken up by the parser occurs f * @<<@ in qualified type paths like @FromIterator\<\::Item\>@ * @>>@ in qualified paths like @\\>::Bar@ * @>=@ in equality predicates like @F\=i32@ - * @>>=@ in equality predicates like @F\\>=i32@ + * @>>=@ in equality predicates like @F\\>=i32@ -} module Language.Rust.Parser.Lexer ( @@ -944,7 +944,7 @@ $hexit = [0-9a-fA-F] \' @lit_byte - = b\' ( \\ @byte_escape + = b\' ( \\ @byte_escape | [^\\'\n\t\r] [ \udc00-\udfff ]? ) \' @@ -1020,28 +1020,28 @@ $white+ { \s -> pure (Space Whitespace s) } "/=" { token SlashEqual } "^=" { token CaretEqual } "%=" { token PercentEqual } - - -"@" { token At } -"." { token Dot } -".." { token DotDot } -"..." { token DotDotDot } -"..=" { token DotDotEqual } -"," { token Comma } -";" { token Semicolon } + + +"@" { token At } +"." { token Dot } +".." { token DotDot } +"..." { token DotDotDot } +"..=" { token DotDotEqual } +"," { token Comma } +";" { token Semicolon } ":" { token Colon } "::" { token ModSep } "->" { token RArrow } "<-" { token LArrow } "=>" { token FatArrow } -"(" { token (OpenDelim Paren) } -")" { token (CloseDelim Paren) } +"(" { token (OpenDelim Paren) } +")" { token (CloseDelim Paren) } "[" { token (OpenDelim Bracket) } "]" { token (CloseDelim Bracket) } -"{" { token (OpenDelim Brace) } -"}" { token (CloseDelim Brace) } -"#" { token Pound } -"$" { token Dollar } +"{" { token (OpenDelim Brace) } +"}" { token (CloseDelim Brace) } +"#" { token Pound } +"$" { token Dollar } @lit_integer { \i -> literal (IntegerTok i) } @lit_float { \f -> literal (FloatTok f) } @@ -1070,13 +1070,13 @@ $white+ { \s -> pure (Space Whitespace s) } @ident { \s -> pure (IdentTok (mkIdent s)) } \? { token Question } -@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } -@ident { \s -> pure (IdentTok (mkIdent s)) } +@raw_ident { \s -> pure (IdentTok ((mkIdent (drop 2 s)){ raw = True })) } +@ident { \s -> pure (IdentTok (mkIdent s)) } @lifetime { \s -> (pure (LifetimeTok (mkIdent (tail s))) :: P Token) } -@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } -@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } +@outer_doc_line { \c -> pure (Doc (drop 3 c) Outer False) } +@outer_doc_line \r { \c -> pure (Doc (drop 3 (init c)) Outer False) } @outer_doc_inline / ( [^\*] | \r | \n ) { \_ -> Doc <$> nestedComment <*> pure Outer <*> pure True } @@ -1095,8 +1095,8 @@ token t _ = pure t -- | Given the first part of a literal, try to parse also a suffix. Even if -- the allowed suffixes are very well defined and only valid on integer and -- float literals, we need to put in the same token whatever suffix follows. --- This is for backwards compatibility if Rust decides to ever add suffixes. -literal :: LitTok -> P Token +-- This is for backwards compatibility if Rust decides to ever add suffixes. +literal :: LitTok -> P Token literal lit = do pos <- getPosition inp <- getInput @@ -1119,16 +1119,16 @@ rawString n = do case c_m of -- The string was never closed Nothing -> fail "Invalid raw (byte)string" - + -- The string has a chance of being closed Just '"' -> do n' <- greedyChar '#' n if n' == n then pure "" - else (('"' : replicate n' '#') ++) <$> rawString n + else (('"' : replicate n' '#') ++) <$> rawString n -- Just another character... - Just c -> ([c] ++) <$> rawString n + Just c -> ([c] ++) <$> rawString n -- | Consume a full inline comment (which may be nested). nestedComment :: P String @@ -1142,15 +1142,15 @@ nestedComment = go 1 "" Nothing -> fail "Unclosed comment" Just '*' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" Just '/' -> nextChar *> go (n-1) ('/':'*':s) Just _ -> go n ('*':s) Just '/' -> do c' <- peekChar - case c' of + case c' of Nothing -> fail "Unclosed comment" - Just '*' -> nextChar *> go (n+1) ('*':'/':s) + Just '*' -> nextChar *> go (n+1) ('*':'/':s) Just _ -> go n ('/':s) Just c' -> go n (c':s) @@ -1162,7 +1162,7 @@ nextChar :: P (Maybe Char) nextChar = do pos <- getPosition inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,inp') = takeChar inp pos' = alexMove pos c @@ -1173,7 +1173,7 @@ nextChar = do peekChar :: P (Maybe Char) peekChar = do inp <- getInput - if inputStreamEmpty inp + if inputStreamEmpty inp then pure Nothing else let (c,_) = takeChar inp in pure (Just c) @@ -1195,7 +1195,7 @@ lexicalError = do fail ("Lexical error: the character " ++ show c ++ " does not fit here") --- Functions required by Alex +-- Functions required by Alex -- | type passed around by Alex functions (required by Alex) type AlexInput = (Position, -- current position, @@ -1223,7 +1223,7 @@ alexMove pos '\n' = retPos pos alexMove pos '\r' = incOffset pos 1 alexMove pos _ = incPos pos 1 --- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. +-- | Lexer for one 'Token'. The only token this cannot produce is 'Interpolated'. lexToken :: P (Spanned Token) lexToken = do tok_maybe <- popToken diff --git a/test/unit-tests/LexerTest.hs b/test/unit-tests/LexerTest.hs index 12e7768..445d197 100644 --- a/test/unit-tests/LexerTest.hs +++ b/test/unit-tests/LexerTest.hs @@ -15,11 +15,11 @@ import Language.Rust.Data.InputStream lexerSuite :: Test lexerSuite = testGroup "lexer suite" [ commonCode, literals ] --- | This contains some random real-life code fragments. The purpose here is +-- | This contains some random real-life code fragments. The purpose here is -- primarily black-box testing. commonCode :: Test commonCode = testGroup "lexing common code fragments" - [ testCode "let span = $p.span;" + [ testCode "let span = $p.span;" [ IdentTok (mkIdent "let") , Space Whitespace " " , IdentTok (mkIdent "span") @@ -32,7 +32,7 @@ commonCode = testGroup "lexing common code fragments" , IdentTok (mkIdent "span") , Semicolon ] - , testCode "$(p.span),+" + , testCode "$(p.span),+" [ Dollar , OpenDelim Paren , IdentTok (mkIdent "p") @@ -94,7 +94,7 @@ commonCode = testGroup "lexing common code fragments" [ IdentTok (mkIdent "fn") , Space Whitespace " " , IdentTok (mkIdent "ܐ_ܐ") - , OpenDelim Paren + , OpenDelim Paren , CloseDelim Paren , Space Whitespace " " , OpenDelim Brace From 86a65407cd7c77594f51d7dc748132a551db6965 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Wed, 28 Aug 2024 14:57:04 -0400 Subject: [PATCH 12/15] Lexer: Properly support Unicode 15.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous lexer implementation in `Language.Rust.Parser.Lexer` was broken for Unicode characters with sufficiently large codepoints, as the previous implementation incorrectly attempted to port UTF-16–encoded codepoints over to `alex`, which is UTF-8–encoded. Rather than try to fix the previous implementation (which was based on old `rustc` code that is no longer used), this ports the lexer to a new implementation that is based on the Rust `unicode-xid` crate (which is how modern versions of `rustc` lex Unicode characters). Specifically: * This adapts `unicode-xid`'s lexer generation script to generate an `alex`-based lexer instead of a Rust-based one. * The new lexer is generated to support codepoints from Unicode 15.1.0. (It is unclear which exact Unicode version the previous lexer targeted, but given that it was last updated in 2016, it was likely quite an old version.) * I have verified that the new lexer can lex exotic Unicode characters such as `𝑂` and `𐌝` by adding them as regression tests. Fixes #3. --- .gitignore | 4 + scripts/unicode.py | 167 ++++ src/Language/Rust/Parser/Lexer.x | 1432 +++++++++++++++++++++--------- test/unit-tests/LexerTest.hs | 4 + 4 files changed, 1196 insertions(+), 411 deletions(-) create mode 100755 scripts/unicode.py diff --git a/.gitignore b/.gitignore index 37fb369..165a17d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ sample-sources/ !sample-sources/statement-expressions.rs !sample-sources/statements.rs !sample-sources/types.rs + +# Unicode-related autogenerated files +DerivedCoreProperties.txt +UnicodeLexer.x diff --git a/scripts/unicode.py b/scripts/unicode.py new file mode 100755 index 0000000..39d954f --- /dev/null +++ b/scripts/unicode.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers +# 2024 Galois Inc. +# +# This script was originally created by the Rust Project Developers as part of +# the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/scripts/unicode.py +# +# See the COPYRIGHT file in the `unicode-xid` crate: +# +# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/COPYRIGHT +# +# Galois Inc. has modified the script to generate an `alex`-based lexer instead +# of a Rust-based lexer. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +import fileinput, re, os, sys + +unicode_version = (15, 1, 0) + +preamble = '''-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. +''' + +postamble = '''-- End of code generated by "scripts/unicode.py". +''' + +def unicode_url(f): + return "http://www.unicode.org/Public/%s.%s.%s/ucd/%s" % (unicode_version + (f,)) + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + os.system("curl -O %s" % unicode_url(f)) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def group_cat(cat): + cat_out = [] + letters = sorted(set(cat)) + cur_start = letters.pop(0) + cur_end = cur_start + for letter in letters: + assert letter > cur_end, \ + "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) + if letter == cur_end + 1: + cur_end = letter + else: + cat_out.append((cur_start, cur_end)) + cur_start = cur_end = letter + cat_out.append((cur_start, cur_end)) + return cat_out + +def ungroup_cat(cat): + cat_out = [] + for (lo, hi) in cat: + while lo <= hi: + cat_out.append(lo) + lo += 1 + return cat_out + +def format_table_content(f, content, indent): + line = "" + first = True + for chunk in content.split("|"): + line += " " * indent + if first: + line += "= " + chunk + else: + line += "| " + chunk + line += "\n" + first = False + f.write(line + '\n') + +def load_properties(f, interestingprops): + fetch(f) + props = {} + re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") + re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + + for line in fileinput.input(os.path.basename(f)): + prop = None + d_lo = 0 + d_hi = 0 + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + prop = m.group(2) + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + prop = m.group(3) + else: + continue + if interestingprops and prop not in interestingprops: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + + # optimize if possible + for prop in props: + props[prop] = group_cat(ungroup_cat(props[prop])) + + return props + +def escape_char(c): + return "\\x%04x" % c + +def emit_table(f, name, t_data): + f.write("@%s\n" % name) + data = "" + first = True + for dat in t_data: + if not first: + data += "|" + first = False + if dat[0] == dat[1]: + data += "%s" % escape_char(dat[0]) + else: + data += "[%s-%s]" % (escape_char(dat[0]), escape_char(dat[1])) + format_table_content(f, data, 2) + +def emit_property_module(f, mod, tbl, emit): + for cat in emit: + emit_table(f, cat, tbl[cat]) + +if __name__ == "__main__": + r = "UnicodeLexer.x" + if os.path.exists(r): + os.remove(r) + with open(r, "w") as rf: + # write the file's preamble + rf.write(preamble) + + # download and parse all the data + rf.write(''' +-- Based on Unicode %s.%s.%s, using the following Unicode table: +-- %s + +''' % (unicode_version + (unicode_url("DerviedCoreProperties.txt"),))) + + want_derived = ["XID_Start", "XID_Continue"] + derived = load_properties("DerivedCoreProperties.txt", want_derived) + emit_property_module(rf, "derived_property", derived, want_derived) + + # write the file's postamble + rf.write(postamble) diff --git a/src/Language/Rust/Parser/Lexer.x b/src/Language/Rust/Parser/Lexer.x index 1b65ac4..3acbad4 100644 --- a/src/Language/Rust/Parser/Lexer.x +++ b/src/Language/Rust/Parser/Lexer.x @@ -56,47 +56,50 @@ import Data.Word ( Word8 ) -- Things to review: -- * improved error messages --- Based heavily on: --- * --- * --- * - } --- XID_START unicode character class -@xid_start +-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly +-- +-- If you need to update this code, perform the following steps: +-- +-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py". +-- 2. Run the "scripts/unicode.py" script. +-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file. +-- 4. Replace the existing autogenerated code here. + +-- Based on Unicode 15.1.0, using the following Unicode table: +-- http://www.unicode.org/Public/15.1.0/ucd/DerviedCoreProperties.txt + +@XID_Start = [\x0041-\x005a] - | "_" | [\x0061-\x007a] | \x00aa | \x00b5 | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee + | [\x0370-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f | \x0386 | [\x0388-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] + | [\x0560-\x0588] | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0621-\x063a] - | [\x0640-\x064a] + | [\x05ef-\x05f2] + | [\x0620-\x064a] | [\x066e-\x066f] | [\x0671-\x06d3] | \x06d5 @@ -106,13 +109,25 @@ import Data.Word ( Word8 ) | \x06ff | \x0710 | [\x0712-\x072f] - | [\x074d-\x074f] - | [\x0780-\x07a5] + | [\x074d-\x07a5] | \x07b1 + | [\x07ca-\x07ea] + | [\x07f4-\x07f5] + | \x07fa + | [\x0800-\x0815] + | \x081a + | \x0824 + | \x0828 + | [\x0840-\x0858] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x08a0-\x08c9] | [\x0904-\x0939] | \x093d | \x0950 | [\x0958-\x0961] + | [\x0971-\x0980] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -120,9 +135,11 @@ import Data.Word ( Word8 ) | \x09b2 | [\x09b6-\x09b9] | \x09bd + | \x09ce | [\x09dc-\x09dd] | [\x09df-\x09e1] | [\x09f0-\x09f1] + | \x09fc | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] | [\x0a13-\x0a28] @@ -142,6 +159,7 @@ import Data.Word ( Word8 ) | \x0abd | \x0ad0 | [\x0ae0-\x0ae1] + | \x0af9 | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] | [\x0b13-\x0b28] @@ -161,27 +179,34 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] + | \x0bd0 | [\x0c05-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] + | [\x0c2a-\x0c39] + | \x0c3d + | [\x0c58-\x0c5a] + | \x0c5d | [\x0c60-\x0c61] + | \x0c80 | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] | [\x0caa-\x0cb3] | [\x0cb5-\x0cb9] | \x0cbd - | \x0cde + | [\x0cdd-\x0cde] | [\x0ce0-\x0ce1] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf2] + | [\x0d04-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d60-\x0d61] + | [\x0d12-\x0d3a] + | \x0d3d + | \x0d4e + | [\x0d54-\x0d56] + | [\x0d5f-\x0d61] + | [\x0d7a-\x0d7f] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -192,83 +217,93 @@ import Data.Word ( Word8 ) | [\x0e40-\x0e46] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb0] + | [\x0ea7-\x0eb0] | \x0eb2 | \x0ebd | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f40-\x0f47] - | [\x0f49-\x0f6a] - | [\x0f88-\x0f8b] - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] + | [\x0f49-\x0f6c] + | [\x0f88-\x0f8c] + | [\x1000-\x102a] + | \x103f | [\x1050-\x1055] + | [\x105a-\x105d] + | \x1061 + | [\x1065-\x1066] + | [\x106e-\x1070] + | [\x1075-\x1081] + | \x108e | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] - | [\x13a0-\x13f4] + | [\x1318-\x135a] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1711] - | [\x1720-\x1731] + | [\x16ee-\x16f8] + | [\x1700-\x1711] + | [\x171f-\x1731] | [\x1740-\x1751] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1780-\x17b3] | \x17d7 | \x17dc - | [\x1820-\x1877] + | [\x1820-\x1878] | [\x1880-\x18a8] - | [\x1900-\x191c] + | \x18aa + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1950-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x1a00-\x1a16] + | [\x1a20-\x1a54] + | \x1aa7 + | [\x1b05-\x1b33] + | [\x1b45-\x1b4c] + | [\x1b83-\x1ba0] + | [\x1bae-\x1baf] + | [\x1bba-\x1be5] + | [\x1c00-\x1c23] + | [\x1c4d-\x1c4f] + | [\x1c5a-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1ce9-\x1cec] + | [\x1cee-\x1cf3] + | [\x1cf5-\x1cf6] + | \x1cfa + | [\x1d00-\x1dbf] + | [\x1e00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -289,6 +324,7 @@ import Data.Word ( Word8 ) | [\x1ff6-\x1ffc] | \x2071 | \x207f + | [\x2090-\x209c] | \x2102 | \x2107 | [\x210a-\x2113] @@ -297,11 +333,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cee] + | [\x2cf2-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d80-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] | [\x3005-\x3007] | [\x3021-\x3029] | [\x3031-\x3035] @@ -310,16 +363,69 @@ import Data.Word ( Word8 ) | [\x309d-\x309f] | [\x30a1-\x30fa] | [\x30fc-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa61f] + | [\xa62a-\xa62b] + | [\xa640-\xa66e] + | [\xa67f-\xa69d] + | [\xa6a0-\xa6ef] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa801] + | [\xa803-\xa805] + | [\xa807-\xa80a] + | [\xa80c-\xa822] + | [\xa840-\xa873] + | [\xa882-\xa8b3] + | [\xa8f2-\xa8f7] + | \xa8fb + | [\xa8fd-\xa8fe] + | [\xa90a-\xa925] + | [\xa930-\xa946] + | [\xa960-\xa97c] + | [\xa984-\xa9b2] + | \xa9cf + | [\xa9e0-\xa9e4] + | [\xa9e6-\xa9ef] + | [\xa9fa-\xa9fe] + | [\xaa00-\xaa28] + | [\xaa40-\xaa42] + | [\xaa44-\xaa4b] + | [\xaa60-\xaa76] + | \xaa7a + | [\xaa7e-\xaaaf] + | \xaab1 + | [\xaab5-\xaab6] + | [\xaab9-\xaabd] + | \xaac0 + | \xaac2 + | [\xaadb-\xaadd] + | [\xaae0-\xaaea] + | [\xaaf2-\xaaf4] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabe2] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | \xfb1d @@ -350,99 +456,290 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - --- XID_CONTINUE unicode character class -@xid_continue + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x10375] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | \x10a00 + | [\x10a10-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae4] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d23] + | [\x10e80-\x10ea9] + | [\x10eb0-\x10eb1] + | [\x10f00-\x10f1c] + | \x10f27 + | [\x10f30-\x10f45] + | [\x10f70-\x10f81] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11003-\x11037] + | [\x11071-\x11072] + | \x11075 + | [\x11083-\x110af] + | [\x110d0-\x110e8] + | [\x11103-\x11126] + | \x11144 + | \x11147 + | [\x11150-\x11172] + | \x11176 + | [\x11183-\x111b2] + | [\x111c1-\x111c4] + | \x111da + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x1122b] + | [\x1123f-\x11240] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112de] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | \x1133d + | \x11350 + | [\x1135d-\x11361] + | [\x11400-\x11434] + | [\x11447-\x1144a] + | [\x1145f-\x11461] + | [\x11480-\x114af] + | [\x114c4-\x114c5] + | \x114c7 + | [\x11580-\x115ae] + | [\x115d8-\x115db] + | [\x11600-\x1162f] + | \x11644 + | [\x11680-\x116aa] + | \x116b8 + | [\x11700-\x1171a] + | [\x11740-\x11746] + | [\x11800-\x1182b] + | [\x118a0-\x118df] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x1192f] + | \x1193f + | \x11941 + | [\x119a0-\x119a7] + | [\x119aa-\x119d0] + | \x119e1 + | \x119e3 + | \x11a00 + | [\x11a0b-\x11a32] + | \x11a3a + | \x11a50 + | [\x11a5c-\x11a89] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c2e] + | \x11c40 + | [\x11c72-\x11c8f] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d30] + | \x11d46 + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d89] + | \x11d98 + | [\x11ee0-\x11ef2] + | \x11f02 + | [\x11f04-\x11f10] + | [\x11f12-\x11f33] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13441-\x13446] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a70-\x16abe] + | [\x16ad0-\x16aed] + | [\x16b00-\x16b2f] + | [\x16b40-\x16b43] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | \x16f50 + | [\x16f93-\x16f9f] + | [\x16fe0-\x16fe1] + | \x16fe3 + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e030-\x1e06d] + | [\x1e100-\x1e12c] + | [\x1e137-\x1e13d] + | \x1e14e + | [\x1e290-\x1e2ad] + | [\x1e2c0-\x1e2eb] + | [\x1e4d0-\x1e4eb] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e900-\x1e943] + | \x1e94b + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + +@XID_Continue = [\x0030-\x0039] | [\x0041-\x005a] | \x005f @@ -453,55 +750,53 @@ import Data.Word ( Word8 ) | \x00ba | [\x00c0-\x00d6] | [\x00d8-\x00f6] - | [\x00f8-\x0236] - | [\x0250-\x02c1] + | [\x00f8-\x02c1] | [\x02c6-\x02d1] | [\x02e0-\x02e4] + | \x02ec | \x02ee - | [\x0300-\x0357] - | [\x035d-\x036f] - | \x0386 - | [\x0388-\x038a] + | [\x0300-\x0374] + | [\x0376-\x0377] + | [\x037b-\x037d] + | \x037f + | [\x0386-\x038a] | \x038c | [\x038e-\x03a1] - | [\x03a3-\x03ce] - | [\x03d0-\x03f5] - | [\x03f7-\x03fb] - | [\x0400-\x0481] - | [\x0483-\x0486] - | [\x048a-\x04ce] - | [\x04d0-\x04f5] - | [\x04f8-\x04f9] - | [\x0500-\x050f] + | [\x03a3-\x03f5] + | [\x03f7-\x0481] + | [\x0483-\x0487] + | [\x048a-\x052f] | [\x0531-\x0556] | \x0559 - | [\x0561-\x0587] - | [\x0591-\x05a1] - | [\x05a3-\x05b9] - | [\x05bb-\x05bd] + | [\x0560-\x0588] + | [\x0591-\x05bd] | \x05bf | [\x05c1-\x05c2] - | \x05c4 + | [\x05c4-\x05c5] + | \x05c7 | [\x05d0-\x05ea] - | [\x05f0-\x05f2] - | [\x0610-\x0615] - | [\x0621-\x063a] - | [\x0640-\x0658] - | [\x0660-\x0669] + | [\x05ef-\x05f2] + | [\x0610-\x061a] + | [\x0620-\x0669] | [\x066e-\x06d3] | [\x06d5-\x06dc] | [\x06df-\x06e8] | [\x06ea-\x06fc] | \x06ff | [\x0710-\x074a] - | [\x074d-\x074f] - | [\x0780-\x07b1] - | [\x0901-\x0939] - | [\x093c-\x094d] - | [\x0950-\x0954] - | [\x0958-\x0963] + | [\x074d-\x07b1] + | [\x07c0-\x07f5] + | \x07fa + | \x07fd + | [\x0800-\x082d] + | [\x0840-\x085b] + | [\x0860-\x086a] + | [\x0870-\x0887] + | [\x0889-\x088e] + | [\x0898-\x08e1] + | [\x08e3-\x0963] | [\x0966-\x096f] - | [\x0981-\x0983] + | [\x0971-\x0983] | [\x0985-\x098c] | [\x098f-\x0990] | [\x0993-\x09a8] @@ -510,11 +805,13 @@ import Data.Word ( Word8 ) | [\x09b6-\x09b9] | [\x09bc-\x09c4] | [\x09c7-\x09c8] - | [\x09cb-\x09cd] + | [\x09cb-\x09ce] | \x09d7 | [\x09dc-\x09dd] | [\x09df-\x09e3] | [\x09e6-\x09f1] + | \x09fc + | \x09fe | [\x0a01-\x0a03] | [\x0a05-\x0a0a] | [\x0a0f-\x0a10] @@ -527,9 +824,10 @@ import Data.Word ( Word8 ) | [\x0a3e-\x0a42] | [\x0a47-\x0a48] | [\x0a4b-\x0a4d] + | \x0a51 | [\x0a59-\x0a5c] | \x0a5e - | [\x0a66-\x0a74] + | [\x0a66-\x0a75] | [\x0a81-\x0a83] | [\x0a85-\x0a8d] | [\x0a8f-\x0a91] @@ -543,6 +841,7 @@ import Data.Word ( Word8 ) | \x0ad0 | [\x0ae0-\x0ae3] | [\x0ae6-\x0aef] + | [\x0af9-\x0aff] | [\x0b01-\x0b03] | [\x0b05-\x0b0c] | [\x0b0f-\x0b10] @@ -550,12 +849,12 @@ import Data.Word ( Word8 ) | [\x0b2a-\x0b30] | [\x0b32-\x0b33] | [\x0b35-\x0b39] - | [\x0b3c-\x0b43] + | [\x0b3c-\x0b44] | [\x0b47-\x0b48] | [\x0b4b-\x0b4d] - | [\x0b56-\x0b57] + | [\x0b55-\x0b57] | [\x0b5c-\x0b5d] - | [\x0b5f-\x0b61] + | [\x0b5f-\x0b63] | [\x0b66-\x0b6f] | \x0b71 | [\x0b82-\x0b83] @@ -567,26 +866,26 @@ import Data.Word ( Word8 ) | [\x0b9e-\x0b9f] | [\x0ba3-\x0ba4] | [\x0ba8-\x0baa] - | [\x0bae-\x0bb5] - | [\x0bb7-\x0bb9] + | [\x0bae-\x0bb9] | [\x0bbe-\x0bc2] | [\x0bc6-\x0bc8] | [\x0bca-\x0bcd] + | \x0bd0 | \x0bd7 - | [\x0be7-\x0bef] - | [\x0c01-\x0c03] - | [\x0c05-\x0c0c] + | [\x0be6-\x0bef] + | [\x0c00-\x0c0c] | [\x0c0e-\x0c10] | [\x0c12-\x0c28] - | [\x0c2a-\x0c33] - | [\x0c35-\x0c39] - | [\x0c3e-\x0c44] + | [\x0c2a-\x0c39] + | [\x0c3c-\x0c44] | [\x0c46-\x0c48] | [\x0c4a-\x0c4d] | [\x0c55-\x0c56] - | [\x0c60-\x0c61] + | [\x0c58-\x0c5a] + | \x0c5d + | [\x0c60-\x0c63] | [\x0c66-\x0c6f] - | [\x0c82-\x0c83] + | [\x0c80-\x0c83] | [\x0c85-\x0c8c] | [\x0c8e-\x0c90] | [\x0c92-\x0ca8] @@ -596,21 +895,20 @@ import Data.Word ( Word8 ) | [\x0cc6-\x0cc8] | [\x0cca-\x0ccd] | [\x0cd5-\x0cd6] - | \x0cde - | [\x0ce0-\x0ce1] + | [\x0cdd-\x0cde] + | [\x0ce0-\x0ce3] | [\x0ce6-\x0cef] - | [\x0d02-\x0d03] - | [\x0d05-\x0d0c] + | [\x0cf1-\x0cf3] + | [\x0d00-\x0d0c] | [\x0d0e-\x0d10] - | [\x0d12-\x0d28] - | [\x0d2a-\x0d39] - | [\x0d3e-\x0d43] + | [\x0d12-\x0d44] | [\x0d46-\x0d48] - | [\x0d4a-\x0d4d] - | \x0d57 - | [\x0d60-\x0d61] + | [\x0d4a-\x0d4e] + | [\x0d54-\x0d57] + | [\x0d5f-\x0d63] | [\x0d66-\x0d6f] - | [\x0d82-\x0d83] + | [\x0d7a-\x0d7f] + | [\x0d81-\x0d83] | [\x0d85-\x0d96] | [\x0d9a-\x0db1] | [\x0db3-\x0dbb] @@ -620,28 +918,22 @@ import Data.Word ( Word8 ) | [\x0dcf-\x0dd4] | \x0dd6 | [\x0dd8-\x0ddf] + | [\x0de6-\x0def] | [\x0df2-\x0df3] | [\x0e01-\x0e3a] | [\x0e40-\x0e4e] | [\x0e50-\x0e59] | [\x0e81-\x0e82] | \x0e84 - | [\x0e87-\x0e88] - | \x0e8a - | \x0e8d - | [\x0e94-\x0e97] - | [\x0e99-\x0e9f] - | [\x0ea1-\x0ea3] + | [\x0e86-\x0e8a] + | [\x0e8c-\x0ea3] | \x0ea5 - | \x0ea7 - | [\x0eaa-\x0eab] - | [\x0ead-\x0eb9] - | [\x0ebb-\x0ebd] + | [\x0ea7-\x0ebd] | [\x0ec0-\x0ec4] | \x0ec6 - | [\x0ec8-\x0ecd] + | [\x0ec8-\x0ece] | [\x0ed0-\x0ed9] - | [\x0edc-\x0edd] + | [\x0edc-\x0edf] | \x0f00 | [\x0f18-\x0f19] | [\x0f20-\x0f29] @@ -649,81 +941,87 @@ import Data.Word ( Word8 ) | \x0f37 | \x0f39 | [\x0f3e-\x0f47] - | [\x0f49-\x0f6a] + | [\x0f49-\x0f6c] | [\x0f71-\x0f84] - | [\x0f86-\x0f8b] - | [\x0f90-\x0f97] + | [\x0f86-\x0f97] | [\x0f99-\x0fbc] | \x0fc6 - | [\x1000-\x1021] - | [\x1023-\x1027] - | [\x1029-\x102a] - | [\x102c-\x1032] - | [\x1036-\x1039] - | [\x1040-\x1049] - | [\x1050-\x1059] + | [\x1000-\x1049] + | [\x1050-\x109d] | [\x10a0-\x10c5] - | [\x10d0-\x10f8] - | [\x1100-\x1159] - | [\x115f-\x11a2] - | [\x11a8-\x11f9] - | [\x1200-\x1206] - | [\x1208-\x1246] - | \x1248 + | \x10c7 + | \x10cd + | [\x10d0-\x10fa] + | [\x10fc-\x1248] | [\x124a-\x124d] | [\x1250-\x1256] | \x1258 | [\x125a-\x125d] - | [\x1260-\x1286] - | \x1288 + | [\x1260-\x1288] | [\x128a-\x128d] - | [\x1290-\x12ae] - | \x12b0 + | [\x1290-\x12b0] | [\x12b2-\x12b5] | [\x12b8-\x12be] | \x12c0 | [\x12c2-\x12c5] - | [\x12c8-\x12ce] - | [\x12d0-\x12d6] - | [\x12d8-\x12ee] - | [\x12f0-\x130e] - | \x1310 + | [\x12c8-\x12d6] + | [\x12d8-\x1310] | [\x1312-\x1315] - | [\x1318-\x131e] - | [\x1320-\x1346] - | [\x1348-\x135a] + | [\x1318-\x135a] + | [\x135d-\x135f] | [\x1369-\x1371] - | [\x13a0-\x13f4] + | [\x1380-\x138f] + | [\x13a0-\x13f5] + | [\x13f8-\x13fd] | [\x1401-\x166c] - | [\x166f-\x1676] + | [\x166f-\x167f] | [\x1681-\x169a] | [\x16a0-\x16ea] - | [\x16ee-\x16f0] - | [\x1700-\x170c] - | [\x170e-\x1714] - | [\x1720-\x1734] + | [\x16ee-\x16f8] + | [\x1700-\x1715] + | [\x171f-\x1734] | [\x1740-\x1753] | [\x1760-\x176c] | [\x176e-\x1770] | [\x1772-\x1773] - | [\x1780-\x17b3] - | [\x17b6-\x17d3] + | [\x1780-\x17d3] | \x17d7 | [\x17dc-\x17dd] | [\x17e0-\x17e9] | [\x180b-\x180d] - | [\x1810-\x1819] - | [\x1820-\x1877] - | [\x1880-\x18a9] - | [\x1900-\x191c] + | [\x180f-\x1819] + | [\x1820-\x1878] + | [\x1880-\x18aa] + | [\x18b0-\x18f5] + | [\x1900-\x191e] | [\x1920-\x192b] | [\x1930-\x193b] | [\x1946-\x196d] | [\x1970-\x1974] - | [\x1d00-\x1d6b] - | [\x1e00-\x1e9b] - | [\x1ea0-\x1ef9] - | [\x1f00-\x1f15] + | [\x1980-\x19ab] + | [\x19b0-\x19c9] + | [\x19d0-\x19da] + | [\x1a00-\x1a1b] + | [\x1a20-\x1a5e] + | [\x1a60-\x1a7c] + | [\x1a7f-\x1a89] + | [\x1a90-\x1a99] + | \x1aa7 + | [\x1ab0-\x1abd] + | [\x1abf-\x1ace] + | [\x1b00-\x1b4c] + | [\x1b50-\x1b59] + | [\x1b6b-\x1b73] + | [\x1b80-\x1bf3] + | [\x1c00-\x1c37] + | [\x1c40-\x1c49] + | [\x1c4d-\x1c7d] + | [\x1c80-\x1c88] + | [\x1c90-\x1cba] + | [\x1cbd-\x1cbf] + | [\x1cd0-\x1cd2] + | [\x1cd4-\x1cfa] + | [\x1d00-\x1f15] | [\x1f18-\x1f1d] | [\x1f20-\x1f45] | [\x1f48-\x1f4d] @@ -742,13 +1040,15 @@ import Data.Word ( Word8 ) | [\x1fe0-\x1fec] | [\x1ff2-\x1ff4] | [\x1ff6-\x1ffc] + | [\x200c-\x200d] | [\x203f-\x2040] | \x2054 | \x2071 | \x207f + | [\x2090-\x209c] | [\x20d0-\x20dc] | \x20e1 - | [\x20e5-\x20ea] + | [\x20e5-\x20f0] | \x2102 | \x2107 | [\x210a-\x2113] @@ -757,11 +1057,28 @@ import Data.Word ( Word8 ) | \x2124 | \x2126 | \x2128 - | [\x212a-\x2131] - | [\x2133-\x2139] - | [\x213d-\x213f] + | [\x212a-\x2139] + | [\x213c-\x213f] | [\x2145-\x2149] - | [\x2160-\x2183] + | \x214e + | [\x2160-\x2188] + | [\x2c00-\x2ce4] + | [\x2ceb-\x2cf3] + | [\x2d00-\x2d25] + | \x2d27 + | \x2d2d + | [\x2d30-\x2d67] + | \x2d6f + | [\x2d7f-\x2d96] + | [\x2da0-\x2da6] + | [\x2da8-\x2dae] + | [\x2db0-\x2db6] + | [\x2db8-\x2dbe] + | [\x2dc0-\x2dc6] + | [\x2dc8-\x2dce] + | [\x2dd0-\x2dd6] + | [\x2dd8-\x2dde] + | [\x2de0-\x2dff] | [\x3005-\x3007] | [\x3021-\x302f] | [\x3031-\x3035] @@ -770,16 +1087,60 @@ import Data.Word ( Word8 ) | [\x3099-\x309a] | [\x309d-\x309f] | [\x30a1-\x30ff] - | [\x3105-\x312c] + | [\x3105-\x312f] | [\x3131-\x318e] - | [\x31a0-\x31b7] + | [\x31a0-\x31bf] | [\x31f0-\x31ff] - | [\x3400-\x4db5] - | [\x4e00-\x9fa5] - | [\xa000-\xa48c] + | [\x3400-\x4dbf] + | [\x4e00-\xa48c] + | [\xa4d0-\xa4fd] + | [\xa500-\xa60c] + | [\xa610-\xa62b] + | [\xa640-\xa66f] + | [\xa674-\xa67d] + | [\xa67f-\xa6f1] + | [\xa717-\xa71f] + | [\xa722-\xa788] + | [\xa78b-\xa7ca] + | [\xa7d0-\xa7d1] + | \xa7d3 + | [\xa7d5-\xa7d9] + | [\xa7f2-\xa827] + | \xa82c + | [\xa840-\xa873] + | [\xa880-\xa8c5] + | [\xa8d0-\xa8d9] + | [\xa8e0-\xa8f7] + | \xa8fb + | [\xa8fd-\xa92d] + | [\xa930-\xa953] + | [\xa960-\xa97c] + | [\xa980-\xa9c0] + | [\xa9cf-\xa9d9] + | [\xa9e0-\xa9fe] + | [\xaa00-\xaa36] + | [\xaa40-\xaa4d] + | [\xaa50-\xaa59] + | [\xaa60-\xaa76] + | [\xaa7a-\xaac2] + | [\xaadb-\xaadd] + | [\xaae0-\xaaef] + | [\xaaf2-\xaaf6] + | [\xab01-\xab06] + | [\xab09-\xab0e] + | [\xab11-\xab16] + | [\xab20-\xab26] + | [\xab28-\xab2e] + | [\xab30-\xab5a] + | [\xab5c-\xab69] + | [\xab70-\xabea] + | [\xabec-\xabed] + | [\xabf0-\xabf9] | [\xac00-\xd7a3] - | [\xf900-\xfa2d] - | [\xfa30-\xfa6a] + | [\xd7b0-\xd7c6] + | [\xd7cb-\xd7fb] + | [\xf900-\xfa6d] + | [\xfa70-\xfad9] | [\xfb00-\xfb06] | [\xfb13-\xfb17] | [\xfb1d-\xfb28] @@ -795,7 +1156,7 @@ import Data.Word ( Word8 ) | [\xfd92-\xfdc7] | [\xfdf0-\xfdf9] | [\xfe00-\xfe0f] - | [\xfe20-\xfe23] + | [\xfe20-\xfe2f] | [\xfe33-\xfe34] | [\xfe4d-\xfe4f] | \xfe71 @@ -814,106 +1175,355 @@ import Data.Word ( Word8 ) | [\xffca-\xffcf] | [\xffd2-\xffd7] | [\xffda-\xffdc] - | \xd800 [\xdc00-\xdc0a] - | \xd800 [\xdc0d-\xdc25] - | \xd800 [\xdc28-\xdc39] - | \xd800 [\xdc3c-\xdc3c] - | \xd800 [\xdc3f-\xdc4c] - | \xd800 [\xdc50-\xdc5c] - | \xd800 [\xdc80-\xdcf9] - | \xd800 [\xdf00-\xdf1d] - | \xd800 [\xdf30-\xdf49] - | \xd800 [\xdf80-\xdf9c] - | \xd801 [\xe000-\xe09c] - | \xd801 [\xe0a0-\xe0a8] - | \xd802 [\xe400-\xe404] - | \xd802 \x0808 - | \xd802 [\xe40a-\xe434] - | \xd802 [\xe437-\xe437] - | \xd802 \x083c - | \xd802 \x083f - | \xd834 [\xad65-\xad68] - | \xd834 [\xad6d-\xad71] - | \xd834 [\xad7b-\xad81] - | \xd834 [\xad85-\xad8a] - | \xd834 [\xadaa-\xadac] - | \xd835 [\xb000-\xb053] - | \xd835 [\xb056-\xb09b] - | \xd835 [\xb09e-\xb09e] - | \xd835 \xd4a2 - | \xd835 [\xb0a5-\xb0a5] - | \xd835 [\xb0a9-\xb0ab] - | \xd835 [\xb0ae-\xb0b8] - | \xd835 \xd4bb - | \xd835 [\xb0bd-\xb0c2] - | \xd835 [\xb0c5-\xb104] - | \xd835 [\xb107-\xb109] - | \xd835 [\xb10d-\xb113] - | \xd835 [\xb116-\xb11b] - | \xd835 [\xb11e-\xb138] - | \xd835 [\xb13b-\xb13d] - | \xd835 [\xb140-\xb143] - | \xd835 \xd546 - | \xd835 [\xb14a-\xb14f] - | \xd835 [\xb152-\xb2a2] - | \xd835 [\xb2a8-\xb2bf] - | \xd835 [\xb2c2-\xb2d9] - | \xd835 [\xb2dc-\xb2f9] - | \xd835 [\xb2fc-\xb313] - | \xd835 [\xb316-\xb333] - | \xd835 [\xb336-\xb34d] - | \xd835 [\xb350-\xb36d] - | \xd835 [\xb370-\xb387] - | \xd835 [\xb38a-\xb3a7] - | \xd835 [\xb3aa-\xb3c1] - | \xd835 [\xb3c4-\xb3c8] - | \xd835 [\xb3ce-\xb3fe] - | \xd840 [\xdc00-\xdffe] - | \xd841 [\xe000-\xe3fe] - | \xd842 [\xe400-\xe7fe] - | \xd843 [\xe800-\xebfe] - | \xd844 [\xec00-\xeffe] - | \xd845 [\xf000-\xf3fe] - | \xd846 [\xf400-\xf7fe] - | \xd847 [\xf800-\xfbfe] - | \xd848 [\xfc00-\xfffe] - | \xd849 [\x0000-\x03fe] - | \xd84a [\x0400-\x07fe] - | \xd84b [\x0800-\x0bfe] - | \xd84c [\x0c00-\x0ffe] - | \xd84d [\x1000-\x13fe] - | \xd84e [\x1400-\x17fe] - | \xd84f [\x1800-\x1bfe] - | \xd850 [\x1c00-\x1ffe] - | \xd851 [\x2000-\x23fe] - | \xd852 [\x2400-\x27fe] - | \xd853 [\x2800-\x2bfe] - | \xd854 [\x2c00-\x2ffe] - | \xd855 [\x3000-\x33fe] - | \xd856 [\x3400-\x37fe] - | \xd857 [\x3800-\x3bfe] - | \xd858 [\x3c00-\x3ffe] - | \xd859 [\x4000-\x43fe] - | \xd85a [\x4400-\x47fe] - | \xd85b [\x4800-\x4bfe] - | \xd85c [\x4c00-\x4ffe] - | \xd85d [\x5000-\x53fe] - | \xd85e [\x5400-\x57fe] - | \xd85f [\x5800-\x5bfe] - | \xd860 [\x5c00-\x5ffe] - | \xd861 [\x6000-\x63fe] - | \xd862 [\x6400-\x67fe] - | \xd863 [\x6800-\x6bfe] - | \xd864 [\x6c00-\x6ffe] - | \xd865 [\x7000-\x73fe] - | \xd866 [\x7400-\x77fe] - | \xd867 [\x7800-\x7bfe] - | \xd868 [\x7c00-\x7ffe] - | \xd869 [\x8000-\x82d5] - | \xd87e [\xd400-\xd61c] - | \xdb40 [\xdd00-\xddee] - -@ident = @xid_start @xid_continue* + | [\x10000-\x1000b] + | [\x1000d-\x10026] + | [\x10028-\x1003a] + | [\x1003c-\x1003d] + | [\x1003f-\x1004d] + | [\x10050-\x1005d] + | [\x10080-\x100fa] + | [\x10140-\x10174] + | \x101fd + | [\x10280-\x1029c] + | [\x102a0-\x102d0] + | \x102e0 + | [\x10300-\x1031f] + | [\x1032d-\x1034a] + | [\x10350-\x1037a] + | [\x10380-\x1039d] + | [\x103a0-\x103c3] + | [\x103c8-\x103cf] + | [\x103d1-\x103d5] + | [\x10400-\x1049d] + | [\x104a0-\x104a9] + | [\x104b0-\x104d3] + | [\x104d8-\x104fb] + | [\x10500-\x10527] + | [\x10530-\x10563] + | [\x10570-\x1057a] + | [\x1057c-\x1058a] + | [\x1058c-\x10592] + | [\x10594-\x10595] + | [\x10597-\x105a1] + | [\x105a3-\x105b1] + | [\x105b3-\x105b9] + | [\x105bb-\x105bc] + | [\x10600-\x10736] + | [\x10740-\x10755] + | [\x10760-\x10767] + | [\x10780-\x10785] + | [\x10787-\x107b0] + | [\x107b2-\x107ba] + | [\x10800-\x10805] + | \x10808 + | [\x1080a-\x10835] + | [\x10837-\x10838] + | \x1083c + | [\x1083f-\x10855] + | [\x10860-\x10876] + | [\x10880-\x1089e] + | [\x108e0-\x108f2] + | [\x108f4-\x108f5] + | [\x10900-\x10915] + | [\x10920-\x10939] + | [\x10980-\x109b7] + | [\x109be-\x109bf] + | [\x10a00-\x10a03] + | [\x10a05-\x10a06] + | [\x10a0c-\x10a13] + | [\x10a15-\x10a17] + | [\x10a19-\x10a35] + | [\x10a38-\x10a3a] + | \x10a3f + | [\x10a60-\x10a7c] + | [\x10a80-\x10a9c] + | [\x10ac0-\x10ac7] + | [\x10ac9-\x10ae6] + | [\x10b00-\x10b35] + | [\x10b40-\x10b55] + | [\x10b60-\x10b72] + | [\x10b80-\x10b91] + | [\x10c00-\x10c48] + | [\x10c80-\x10cb2] + | [\x10cc0-\x10cf2] + | [\x10d00-\x10d27] + | [\x10d30-\x10d39] + | [\x10e80-\x10ea9] + | [\x10eab-\x10eac] + | [\x10eb0-\x10eb1] + | [\x10efd-\x10f1c] + | \x10f27 + | [\x10f30-\x10f50] + | [\x10f70-\x10f85] + | [\x10fb0-\x10fc4] + | [\x10fe0-\x10ff6] + | [\x11000-\x11046] + | [\x11066-\x11075] + | [\x1107f-\x110ba] + | \x110c2 + | [\x110d0-\x110e8] + | [\x110f0-\x110f9] + | [\x11100-\x11134] + | [\x11136-\x1113f] + | [\x11144-\x11147] + | [\x11150-\x11173] + | \x11176 + | [\x11180-\x111c4] + | [\x111c9-\x111cc] + | [\x111ce-\x111da] + | \x111dc + | [\x11200-\x11211] + | [\x11213-\x11237] + | [\x1123e-\x11241] + | [\x11280-\x11286] + | \x11288 + | [\x1128a-\x1128d] + | [\x1128f-\x1129d] + | [\x1129f-\x112a8] + | [\x112b0-\x112ea] + | [\x112f0-\x112f9] + | [\x11300-\x11303] + | [\x11305-\x1130c] + | [\x1130f-\x11310] + | [\x11313-\x11328] + | [\x1132a-\x11330] + | [\x11332-\x11333] + | [\x11335-\x11339] + | [\x1133b-\x11344] + | [\x11347-\x11348] + | [\x1134b-\x1134d] + | \x11350 + | \x11357 + | [\x1135d-\x11363] + | [\x11366-\x1136c] + | [\x11370-\x11374] + | [\x11400-\x1144a] + | [\x11450-\x11459] + | [\x1145e-\x11461] + | [\x11480-\x114c5] + | \x114c7 + | [\x114d0-\x114d9] + | [\x11580-\x115b5] + | [\x115b8-\x115c0] + | [\x115d8-\x115dd] + | [\x11600-\x11640] + | \x11644 + | [\x11650-\x11659] + | [\x11680-\x116b8] + | [\x116c0-\x116c9] + | [\x11700-\x1171a] + | [\x1171d-\x1172b] + | [\x11730-\x11739] + | [\x11740-\x11746] + | [\x11800-\x1183a] + | [\x118a0-\x118e9] + | [\x118ff-\x11906] + | \x11909 + | [\x1190c-\x11913] + | [\x11915-\x11916] + | [\x11918-\x11935] + | [\x11937-\x11938] + | [\x1193b-\x11943] + | [\x11950-\x11959] + | [\x119a0-\x119a7] + | [\x119aa-\x119d7] + | [\x119da-\x119e1] + | [\x119e3-\x119e4] + | [\x11a00-\x11a3e] + | \x11a47 + | [\x11a50-\x11a99] + | \x11a9d + | [\x11ab0-\x11af8] + | [\x11c00-\x11c08] + | [\x11c0a-\x11c36] + | [\x11c38-\x11c40] + | [\x11c50-\x11c59] + | [\x11c72-\x11c8f] + | [\x11c92-\x11ca7] + | [\x11ca9-\x11cb6] + | [\x11d00-\x11d06] + | [\x11d08-\x11d09] + | [\x11d0b-\x11d36] + | \x11d3a + | [\x11d3c-\x11d3d] + | [\x11d3f-\x11d47] + | [\x11d50-\x11d59] + | [\x11d60-\x11d65] + | [\x11d67-\x11d68] + | [\x11d6a-\x11d8e] + | [\x11d90-\x11d91] + | [\x11d93-\x11d98] + | [\x11da0-\x11da9] + | [\x11ee0-\x11ef6] + | [\x11f00-\x11f10] + | [\x11f12-\x11f3a] + | [\x11f3e-\x11f42] + | [\x11f50-\x11f59] + | \x11fb0 + | [\x12000-\x12399] + | [\x12400-\x1246e] + | [\x12480-\x12543] + | [\x12f90-\x12ff0] + | [\x13000-\x1342f] + | [\x13440-\x13455] + | [\x14400-\x14646] + | [\x16800-\x16a38] + | [\x16a40-\x16a5e] + | [\x16a60-\x16a69] + | [\x16a70-\x16abe] + | [\x16ac0-\x16ac9] + | [\x16ad0-\x16aed] + | [\x16af0-\x16af4] + | [\x16b00-\x16b36] + | [\x16b40-\x16b43] + | [\x16b50-\x16b59] + | [\x16b63-\x16b77] + | [\x16b7d-\x16b8f] + | [\x16e40-\x16e7f] + | [\x16f00-\x16f4a] + | [\x16f4f-\x16f87] + | [\x16f8f-\x16f9f] + | [\x16fe0-\x16fe1] + | [\x16fe3-\x16fe4] + | [\x16ff0-\x16ff1] + | [\x17000-\x187f7] + | [\x18800-\x18cd5] + | [\x18d00-\x18d08] + | [\x1aff0-\x1aff3] + | [\x1aff5-\x1affb] + | [\x1affd-\x1affe] + | [\x1b000-\x1b122] + | \x1b132 + | [\x1b150-\x1b152] + | \x1b155 + | [\x1b164-\x1b167] + | [\x1b170-\x1b2fb] + | [\x1bc00-\x1bc6a] + | [\x1bc70-\x1bc7c] + | [\x1bc80-\x1bc88] + | [\x1bc90-\x1bc99] + | [\x1bc9d-\x1bc9e] + | [\x1cf00-\x1cf2d] + | [\x1cf30-\x1cf46] + | [\x1d165-\x1d169] + | [\x1d16d-\x1d172] + | [\x1d17b-\x1d182] + | [\x1d185-\x1d18b] + | [\x1d1aa-\x1d1ad] + | [\x1d242-\x1d244] + | [\x1d400-\x1d454] + | [\x1d456-\x1d49c] + | [\x1d49e-\x1d49f] + | \x1d4a2 + | [\x1d4a5-\x1d4a6] + | [\x1d4a9-\x1d4ac] + | [\x1d4ae-\x1d4b9] + | \x1d4bb + | [\x1d4bd-\x1d4c3] + | [\x1d4c5-\x1d505] + | [\x1d507-\x1d50a] + | [\x1d50d-\x1d514] + | [\x1d516-\x1d51c] + | [\x1d51e-\x1d539] + | [\x1d53b-\x1d53e] + | [\x1d540-\x1d544] + | \x1d546 + | [\x1d54a-\x1d550] + | [\x1d552-\x1d6a5] + | [\x1d6a8-\x1d6c0] + | [\x1d6c2-\x1d6da] + | [\x1d6dc-\x1d6fa] + | [\x1d6fc-\x1d714] + | [\x1d716-\x1d734] + | [\x1d736-\x1d74e] + | [\x1d750-\x1d76e] + | [\x1d770-\x1d788] + | [\x1d78a-\x1d7a8] + | [\x1d7aa-\x1d7c2] + | [\x1d7c4-\x1d7cb] + | [\x1d7ce-\x1d7ff] + | [\x1da00-\x1da36] + | [\x1da3b-\x1da6c] + | \x1da75 + | \x1da84 + | [\x1da9b-\x1da9f] + | [\x1daa1-\x1daaf] + | [\x1df00-\x1df1e] + | [\x1df25-\x1df2a] + | [\x1e000-\x1e006] + | [\x1e008-\x1e018] + | [\x1e01b-\x1e021] + | [\x1e023-\x1e024] + | [\x1e026-\x1e02a] + | [\x1e030-\x1e06d] + | \x1e08f + | [\x1e100-\x1e12c] + | [\x1e130-\x1e13d] + | [\x1e140-\x1e149] + | \x1e14e + | [\x1e290-\x1e2ae] + | [\x1e2c0-\x1e2f9] + | [\x1e4d0-\x1e4f9] + | [\x1e7e0-\x1e7e6] + | [\x1e7e8-\x1e7eb] + | [\x1e7ed-\x1e7ee] + | [\x1e7f0-\x1e7fe] + | [\x1e800-\x1e8c4] + | [\x1e8d0-\x1e8d6] + | [\x1e900-\x1e94b] + | [\x1e950-\x1e959] + | [\x1ee00-\x1ee03] + | [\x1ee05-\x1ee1f] + | [\x1ee21-\x1ee22] + | \x1ee24 + | \x1ee27 + | [\x1ee29-\x1ee32] + | [\x1ee34-\x1ee37] + | \x1ee39 + | \x1ee3b + | \x1ee42 + | \x1ee47 + | \x1ee49 + | \x1ee4b + | [\x1ee4d-\x1ee4f] + | [\x1ee51-\x1ee52] + | \x1ee54 + | \x1ee57 + | \x1ee59 + | \x1ee5b + | \x1ee5d + | \x1ee5f + | [\x1ee61-\x1ee62] + | \x1ee64 + | [\x1ee67-\x1ee6a] + | [\x1ee6c-\x1ee72] + | [\x1ee74-\x1ee77] + | [\x1ee79-\x1ee7c] + | \x1ee7e + | [\x1ee80-\x1ee89] + | [\x1ee8b-\x1ee9b] + | [\x1eea1-\x1eea3] + | [\x1eea5-\x1eea9] + | [\x1eeab-\x1eebb] + | [\x1fbf0-\x1fbf9] + | [\x20000-\x2a6df] + | [\x2a700-\x2b739] + | [\x2b740-\x2b81d] + | [\x2b820-\x2cea1] + | [\x2ceb0-\x2ebe0] + | [\x2ebf0-\x2ee5d] + | [\x2f800-\x2fa1d] + | [\x30000-\x3134a] + | [\x31350-\x323af] + | [\xe0100-\xe01ef] + +-- End of code generated by "scripts/unicode.py". + +-- See https://github.com/rust-lang/rust/blob/ac77e88f7a84e20311f5518e34c806503d586c1c/compiler/rustc_lexer/src/lib.rs#L313-L326 +@id_start = "_" | @XID_Start +@id_continue = @XID_Continue + +@ident = @id_start @id_continue* @raw_ident = r \# @ident @lifetime = \' @ident diff --git a/test/unit-tests/LexerTest.hs b/test/unit-tests/LexerTest.hs index 445d197..10f796c 100644 --- a/test/unit-tests/LexerTest.hs +++ b/test/unit-tests/LexerTest.hs @@ -122,6 +122,10 @@ commonCode = testGroup "lexing common code fragments" , LiteralTok (IntegerTok "1") Nothing ] + -- Unicode characters that require surrogate pairs to encode in UTF-16. These + -- serve as regression tests for issue #3. + , testCode "𝑂_𝑂" [ IdentTok (mkIdent "𝑂_𝑂") ] + , testCode "𐌝" [ IdentTok (mkIdent "𐌝") ] ] From 1828c2418e307cf82ed49cf56cc21437d2fdc443 Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Fri, 18 Oct 2024 11:17:17 -0700 Subject: [PATCH 13/15] Restrict `happy` version to less then 2.1 --- language-rust.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language-rust.cabal b/language-rust.cabal index 76ab2c6..5606bae 100644 --- a/language-rust.cabal +++ b/language-rust.cabal @@ -41,7 +41,7 @@ library -Wincomplete-uni-patterns -Wmissing-signatures - build-tools: alex >=3.1, happy >=1.19.8 + build-tools: alex >=3.1, happy >=1.19.8 && < 2.1 default-language: Haskell2010 exposed-modules: Language.Rust.Syntax From 73b8160f9fb5681d44c0ac6147721a6e31d5de8a Mon Sep 17 00:00:00 2001 From: Iavor Diatchki Date: Fri, 18 Oct 2024 13:27:58 -0700 Subject: [PATCH 14/15] Add a comment on the upper bound. --- language-rust.cabal | 3 +++ 1 file changed, 3 insertions(+) diff --git a/language-rust.cabal b/language-rust.cabal index 5606bae..6871c88 100644 --- a/language-rust.cabal +++ b/language-rust.cabal @@ -42,6 +42,9 @@ library -Wmissing-signatures build-tools: alex >=3.1, happy >=1.19.8 && < 2.1 + -- We restrict to < 2.1, because of + -- https://github.com/haskell/happy/issues/320 + default-language: Haskell2010 exposed-modules: Language.Rust.Syntax From fe4c7809ee8f65507c9be393cd8ea45d7b2b05d6 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Mon, 6 Jan 2025 10:56:44 -0600 Subject: [PATCH 15/15] Allow building with happy-2.1.1 or later `happy-2.1.1` includes a fix for https://github.com/haskell/happy/issues/320, which was preventing `language-rust` from building. Now that this version of `happy` is on Hackage, we no longer need to include such a restrictive upper version bound on `happy`. --- language-rust.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/language-rust.cabal b/language-rust.cabal index 6871c88..0f30d1a 100644 --- a/language-rust.cabal +++ b/language-rust.cabal @@ -41,8 +41,8 @@ library -Wincomplete-uni-patterns -Wmissing-signatures - build-tools: alex >=3.1, happy >=1.19.8 && < 2.1 - -- We restrict to < 2.1, because of + build-tools: alex >=3.1, happy >=1.19.8 && < 2.1 || >= 2.1.1 + -- We restrict to < 2.1 || >= 2.1.1, because of -- https://github.com/haskell/happy/issues/320 default-language: Haskell2010