Skip to content

Commit 4305dab

Browse files
authored
Fix dhall-to-nix encoding of symbols with special keys (#2426)
* Fix dhall-to-nix encoding of symbols with special keys Symbols in nix can only consist of a very restricted amount of characters, whereas in dhall they can be basically anything. So let’s use an encoding scheme similar to what GHC uses to generate C symbols. Code slightly changed (some GHC-specific cases removed). I might have missed some cases of dhall symbols that are translated verbatim. * dhall-to-nix: Encode unions slightly differently for better symbol Symbols in nix can only consist of a very restricted amount of characters, whereas in dhall they can be basically anything. When you want to get a value of a union, before it was generated into ``` { Foo, Bar }: Foo ``` where `Foo` cannot be a complex symbol like `{ Foo/Baz, Bar }: Foo/Baz`, because nix does not allow it in anonymous record arguments. So now we generate it as ``` u: u."Foo/Baz" ``` which should always work and is equal (though it loses the information of what other fields are there in the nix code). Before I faultily encoded some of these symbols with a Z-encoding, but that was wrong, so it was undone. * dhall-to-nix: Quote field selection symbols Another one I missed, when you have a field selector, you want to quote it, in case it has some symbols nix does not know how to handle. `x.Foo/bar` will now be `x."Foo/bar"`, which is valid nix. * dhall-to-nix: prepare using Text.concatMap in Z-encoding We copied the Z-encoding functions from GHC more or less verbatim, but we can rewrite it in terms of `Text.concatMap`, which should perform better. In this first step, we change all the helper functions from `Char -> String` to `Char -> Text`, and apply hlint warnings. * dhall-to-nix: Z-encode symbols without going through String We can drop the extra `any needsEncoding` check, since it should be performant enough on its own when using `Text.concatMap` and simplifies the code a bit. * dhall-to-nix: Only double-quote symbols if they have symbols A small improvement in the generation logic. The hnix should really just do this for us.
1 parent 587c087 commit 4305dab

File tree

1 file changed

+140
-24
lines changed

1 file changed

+140
-24
lines changed

dhall-nix/src/Dhall/Nix.hs

Lines changed: 140 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,13 @@ import Data.Fix (Fix (..))
9999
import Data.Foldable (toList)
100100
import Data.List.NonEmpty (NonEmpty(..))
101101
import Data.Text (Text)
102+
import qualified Data.Text as Text
102103
import Data.Traversable (for)
103104
import Data.Typeable (Typeable)
104105
import Data.Void (Void, absurd)
105106
import Lens.Family (toListOf)
107+
import Numeric (showHex)
108+
import Data.Char (ord, isDigit, isAsciiLower, isAsciiUpper)
106109

107110
import Dhall.Core
108111
( Binding (..)
@@ -226,7 +229,7 @@ Nix
226229
$_ERROR: Cannot project by type
227230

228231
The ❰dhall-to-nix❱ compiler does not support projecting out a subset of a record
229-
by the expected type (i.e. ❰someRecord.(someType)❱
232+
by the expected type (i.e. ❰someRecord.(someType)❱
230233
|]
231234

232235
show CannotShowConstructor =
@@ -255,7 +258,7 @@ instance Exception CompileError
255258
{-| Convert a Dhall expression to the equivalent Nix expression
256259
257260
>>> :set -XOverloadedStrings
258-
>>> dhallToNix (Lam "x" Natural (Lam "y" Natural (NaturalPlus "x" "y")))
261+
>>> dhallToNix (Lam "x" Natural (Lam "y" Natural (NaturalPlus "x" "y")))
259262
Right (NAbs (Param "x") (NAbs (Param "y") (NBinary NPlus (NSym "x") (NSym "y"))))
260263
>>> fmap Nix.Pretty.prettyNix it
261264
Right x: y: x + y
@@ -336,21 +339,17 @@ dhallToNix e =
336339
Dhall.Optics.rewriteOf Dhall.Core.subExpressions renameShadowed
337340

338341
loop (Const _) = return untranslatable
339-
loop (Var (V a 0)) = return (Nix.mkSym a)
342+
loop (Var (V a 0)) = return (Nix.mkSym (zEncodeSymbol a))
340343
loop (Var a ) = Left (CannotReferenceShadowedVariable a)
341344
loop (Lam _ FunctionBinding { functionBindingVariable = a } c) = do
342345
c' <- loop c
343-
return (Param (VarName a) ==> c')
346+
return (Param (VarName $ zEncodeSymbol a) ==> c')
344347
loop (Pi _ _ _ _) = return untranslatable
345348
loop (App None _) =
346349
return Nix.mkNull
347-
loop (App (Field (Union kts) (Dhall.Core.fieldSelectionLabel -> k)) v) = do
350+
loop (App (Field (Union _kts) (Dhall.Core.fieldSelectionLabel -> k)) v) = do
348351
v' <- loop v
349-
let e0 = do
350-
k' <- Dhall.Map.keys kts
351-
return (k', Nothing)
352-
let e2 = Nix.mkSym k @@ v'
353-
return (Nix.mkParamset e0 False ==> e2)
352+
return (unionChoice (VarName k) (Just v'))
354353
loop (App a b) = do
355354
a' <- loop a
356355
b' <- loop b
@@ -359,7 +358,7 @@ dhallToNix e =
359358
let MultiLet bindings b = Dhall.Core.multiLet a0 b0
360359
bindings' <- for bindings $ \Binding{ variable, value } -> do
361360
value' <- loop value
362-
pure (variable, value')
361+
pure (zEncodeSymbol variable, value')
363362
b' <- loop b
364363
return (Nix.letsE (toList bindings') b')
365364
loop (Annot a _) = loop a
@@ -626,7 +625,7 @@ dhallToNix e =
626625
-- see https://github.com/dhall-lang/dhall-haskell/issues/2414
627626
nixAttrs pairs =
628627
Fix $ NSet NonRecursive $
629-
(\(key, val) -> NamedVar (DynamicKey (Plain (DoubleQuoted [Plain key])) :| []) val Nix.nullPos)
628+
(\(key, val) -> NamedVar ((mkDoubleQuotedIfNecessary (VarName key)) :| []) val Nix.nullPos)
630629
<$> pairs
631630
loop (Union _) = return untranslatable
632631
loop (Combine _ _ a b) = do
@@ -715,20 +714,11 @@ dhallToNix e =
715714
-- (here "x").
716715
--
717716
-- This translates `< Foo : T >.Foo` to `x: { Foo }: Foo x`
718-
Just (Just _) -> do
719-
let e0 = do
720-
k' <- Dhall.Map.keys kts
721-
return (k', Nothing)
722-
return ("x" ==> Nix.mkParamset e0 False ==> (Nix.mkSym k @@ "x"))
723-
724-
_ -> do
725-
let e0 = do
726-
k' <- Dhall.Map.keys kts
727-
return (k', Nothing)
728-
return (Nix.mkParamset e0 False ==> Nix.mkSym k)
717+
Just (Just _) -> return ("x" ==> (unionChoice (VarName k) (Just "x")))
718+
_ -> return (unionChoice (VarName k) Nothing)
729719
loop (Field a (Dhall.Core.fieldSelectionLabel -> b)) = do
730720
a' <- loop a
731-
return (a' @. b)
721+
return (Fix (Nix.NSelect Nothing a' (mkDoubleQuotedIfNecessary (VarName b) :| [])))
732722
loop (Project a (Left b)) = do
733723
a' <- loop a
734724
return (Nix.mkNonRecSet [ Nix.inheritFrom a' (fmap VarName b) ])
@@ -759,3 +749,129 @@ dhallToNix e =
759749
loop (ImportAlt a _) = loop a
760750
loop (Note _ b) = loop b
761751
loop (Embed x) = absurd x
752+
753+
-- | Previously we turned @<Foo | Bar>.Foo@ into @{ Foo, Bar }: Foo@,
754+
-- but this would not work with <Frob/Baz>.Frob/Baz (cause the slash is not a valid symbol char in nix)
755+
-- so we generate @union: union."Frob/Baz"@ instead.
756+
--
757+
-- If passArgument is @Just@, pass the argument to the union selector.
758+
unionChoice :: VarName -> Maybe NExpr -> NExpr
759+
unionChoice chosenKey passArgument =
760+
let selector = Fix (Nix.NSelect Nothing (Nix.mkSym "u") (mkDoubleQuotedIfNecessary chosenKey :| []))
761+
in Nix.Param "u" ==>
762+
case passArgument of
763+
Nothing -> selector
764+
Just arg -> selector @@ arg
765+
766+
767+
-- | Double-quote a field name (record or union). This makes sure it’s recognized as a valid name by nix, e.g. in
768+
--
769+
-- @{ "foo/bar" = 42; }."foo/bar" }@
770+
--
771+
-- where
772+
--
773+
-- @{ foo/bar = 42; }.foo/bar@ is not syntactically valid nix.
774+
--
775+
-- This is only done if necessary (where “necessary” is not super defined right now).
776+
mkDoubleQuotedIfNecessary :: VarName -> NKeyName r
777+
mkDoubleQuotedIfNecessary key@(VarName keyName) =
778+
if Text.all simpleChar keyName
779+
then StaticKey key
780+
else DynamicKey (Plain (DoubleQuoted [Plain keyName]))
781+
where
782+
simpleChar c = isAsciiLower c || isAsciiUpper c
783+
784+
785+
-- | Nix does not support symbols like @foo/bar@, but they are allowed in dhall.
786+
-- So if they happen, we need to encode them with an ASCII escaping scheme.
787+
--
788+
-- This is copied/inspired by the Z-Encoding scheme from GHC, see
789+
-- https://hackage.haskell.org/package/zenc-0.1.2/docs/Text-Encoding-Z.html
790+
--
791+
-- Original Source is BSD-3-Clause, Copyright (c)2011, Jason Dagit
792+
zEncodeSymbol :: Text -> Text
793+
zEncodeSymbol = zEncodeString
794+
795+
-- | The basic encoding scheme is this:
796+
797+
-- * Alphabetic characters (upper and lower) and digits
798+
-- all translate to themselves;
799+
-- except 'Z', which translates to 'ZZ'
800+
-- and 'z', which translates to 'zz'
801+
--
802+
-- * Most other printable characters translate to 'zx' or 'Zx' for some
803+
-- alphabetic character x
804+
--
805+
-- * The others translate as 'znnnU' where 'nnn' is the decimal number
806+
-- of the character
807+
--
808+
-- @
809+
-- Before After
810+
-- --------------------------
811+
-- Trak Trak
812+
-- foo-wib foozmwib
813+
-- \> zg
814+
-- \>1 zg1
815+
-- foo\# foozh
816+
-- foo\#\# foozhzh
817+
-- foo\#\#1 foozhzh1
818+
-- fooZ fooZZ
819+
-- :+ ZCzp
820+
-- @
821+
zEncodeString :: Text -> Text
822+
zEncodeString cs = case Text.uncons cs of
823+
Nothing -> Text.empty
824+
Just (c, cs') ->
825+
encodeDigitChar c
826+
<> Text.concatMap encodeChar cs'
827+
828+
-- | Whether the given characters needs to be z-encoded.
829+
needsEncoding :: Char -> Bool
830+
needsEncoding 'Z' = True
831+
needsEncoding 'z' = True
832+
needsEncoding c = not
833+
( isAsciiLower c
834+
|| isAsciiUpper c
835+
|| isDigit c )
836+
837+
-- If a digit is at the start of a symbol then we need to encode it.
838+
encodeDigitChar :: Char -> Text
839+
encodeDigitChar c | isDigit c = encodeAsUnicodeChar c
840+
encodeDigitChar c = encodeChar c
841+
842+
encodeChar :: Char -> Text
843+
encodeChar c | not (needsEncoding c) = [c] -- Common case first
844+
845+
encodeChar '(' = "ZL"
846+
encodeChar ')' = "ZR"
847+
encodeChar '[' = "ZM"
848+
encodeChar ']' = "ZN"
849+
encodeChar ':' = "ZC"
850+
encodeChar 'Z' = "ZZ"
851+
encodeChar 'z' = "zz"
852+
encodeChar '&' = "za"
853+
encodeChar '|' = "zb"
854+
encodeChar '^' = "zc"
855+
encodeChar '$' = "zd"
856+
encodeChar '=' = "ze"
857+
encodeChar '>' = "zg"
858+
encodeChar '#' = "zh"
859+
encodeChar '.' = "zi"
860+
encodeChar '<' = "zl"
861+
-- we can’t allow @-@, because it is not valid at the start of a symbol
862+
encodeChar '-' = "zm"
863+
encodeChar '!' = "zn"
864+
encodeChar '+' = "zp"
865+
encodeChar '\'' = "zq"
866+
encodeChar '\\' = "zr"
867+
encodeChar '/' = "zs"
868+
encodeChar '*' = "zt"
869+
-- We can allow @_@ because it can appear anywhere in a symbol
870+
-- encodeChar '_' = "zu"
871+
encodeChar '%' = "zv"
872+
encodeChar c = encodeAsUnicodeChar c
873+
874+
encodeAsUnicodeChar :: Char -> Text
875+
encodeAsUnicodeChar c = 'z' `Text.cons` if isDigit (Text.head hex_str) then hex_str
876+
else '0' `Text.cons` hex_str
877+
where hex_str = Text.pack $ showHex (ord c) "U"

0 commit comments

Comments
 (0)