Skip to content

Commit 79f3bc4

Browse files
committed
Add difference tests for pretty-printing
We now also do a round-trip difference test for pretty-printing. In the process, I found a bunch of pretty-printing bugs: * string literals - newlines were getting flattened - missing braces in unicode escapes - 'rb' instead of 'br' for raw bytestring prefixes - '\xHH' characters had too wide a range (only up to '\x7F' is allowed) - unicode escape is at most 6 hex digits, not 8 * commas should never be present after '..' in struct patterns * the content of doc comments should not be indented * successive tuple field accessors need a space between them ('x.0 .0' instead of 'x.0.0') * successive '..' also need space ('x .. .. y' instead of 'x....y')
1 parent 88d5e3b commit 79f3bc4

File tree

8 files changed

+130
-83
lines changed

8 files changed

+130
-83
lines changed

src/Language/Rust/Parser.hs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ sourceFile :: SourceFile Span
2323

2424
module Language.Rust.Parser (
2525
-- * Parsing
26-
parse, parse', readSourceFile, Parse(..), P, execParser, initPos, Span,
26+
parse, parse', readSourceFile, readTokens, Parse(..), P, execParser, initPos, Span,
2727
-- * Lexing
2828
lexToken, lexNonSpace, lexTokens, translateLit,
2929
-- * Input stream
@@ -32,9 +32,9 @@ module Language.Rust.Parser (
3232
lexicalError, parseError, ParseFail,
3333
) where
3434

35-
import Language.Rust.Syntax.AST
35+
import Language.Rust.Syntax
3636
import Language.Rust.Data.InputStream (InputStream, readInputStream, inputStreamToString, inputStreamFromString)
37-
import Language.Rust.Data.Position (Position, Span, initPos, prettyPosition)
37+
import Language.Rust.Data.Position (Position, Span, Spanned, initPos, prettyPosition)
3838
import Language.Rust.Parser.Internal
3939
import Language.Rust.Parser.Lexer (lexToken, lexNonSpace, lexTokens, lexicalError)
4040
import Language.Rust.Parser.Literals (translateLit)
@@ -59,6 +59,14 @@ parse' is = case execParser parser is initPos of
5959
readSourceFile :: FilePath -> IO (SourceFile Span)
6060
readSourceFile fileName = parse' <$> readInputStream fileName
6161

62+
-- | Given a path pointing to a Rust source file, read that file and lex it (ignoring whitespace)
63+
readTokens :: FilePath -> IO [Spanned Token]
64+
readTokens fileName = do
65+
inp <- readInputStream fileName
66+
case execParser (lexTokens lexNonSpace) inp initPos of
67+
Left (pos, msg) -> throw (ParseFail pos msg)
68+
Right x -> pure x
69+
6270
-- | Exceptions that occur during parsing
6371
data ParseFail = ParseFail Position String deriving (Eq, Typeable)
6472

src/Language/Rust/Parser/Lexer.x

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -917,16 +917,17 @@ $hexit = [0-9a-fA-F]
917917

918918
@char_escape
919919
= [nrt\\'"0]
920-
| [xX] $hexit $hexit
921-
| u $hexit $hexit $hexit $hexit
922-
| U $hexit $hexit $hexit $hexit $hexit $hexit $hexit $hexit
920+
| [xX] [0-7] $hexit
923921
| u\{ $hexit \}
924922
| u\{ $hexit $hexit \}
925923
| u\{ $hexit $hexit $hexit \}
926924
| u\{ $hexit $hexit $hexit $hexit \}
927925
| u\{ $hexit $hexit $hexit $hexit $hexit \}
928926
| u\{ $hexit $hexit $hexit $hexit $hexit $hexit \}
929927
928+
@byte_escape
929+
= [xX] $hexit $hexit
930+
| [nrt\\'"0]
930931

931932
-- literals
932933

@@ -938,8 +939,7 @@ $hexit = [0-9a-fA-F]
938939
\'
939940

940941
@lit_byte
941-
= b\' ( \\ ( [xX] $hexit $hexit
942-
| [nrt\\'"0] )
942+
= b\' ( \\ @byte_escape
943943
| [^\\'\n\t\r] [ \udc00-\udfff ]?
944944
)
945945
\'
@@ -956,8 +956,8 @@ $hexit = [0-9a-fA-F]
956956
@lit_float = [0-9][0-9_]* @decimal_suffix? @exponent_suffix?
957957
@lit_float2 = [0-9][0-9_]* \.
958958

959-
@lit_str = \" (\\\n | \\\r\n | \\ @char_escape | [^\\\"] | \n | \r)* \"
960-
@lit_byte_str = b @lit_str
959+
@lit_str = \" (\\\n | \\\r\n | \\ @char_escape | [^\\\"] | \n | \r)* \"
960+
@lit_byte_str = b \" (\\\n | \\\r\n | \\ @byte_escape | [^\\\"] | \n | \r)* \"
961961

962962
@lit_raw_str = r \#* \"
963963
@lit_raw_bstr = br \#* \"

src/Language/Rust/Pretty/Internal.hs

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ printToken (CloseDelim Bracket) = "]"
208208
printToken (CloseDelim Brace) = "}"
209209
printToken (CloseDelim NoDelim) = ""
210210
-- Literals
211-
printToken (LiteralTok l s) = printLitTok l <> perhaps printName s
211+
printToken (LiteralTok l s) = noIndent $ printLitTok l <> perhaps printName s
212212
-- Name components
213213
printToken (IdentTok i) = printIdent i
214214
printToken Underscore = "_"
@@ -232,10 +232,11 @@ printLitTok (ByteTok n) = "b'" <> printName n <> "'"
232232
printLitTok (CharTok n) = "'" <> printName n <> "'"
233233
printLitTok (IntegerTok n) = printName n
234234
printLitTok (FloatTok n) = printName n
235-
printLitTok (StrTok n) = "\"" <> printName n <> "\""
236-
printLitTok (StrRawTok n m) = let pad = pretty (replicate m '#') in "r" <> pad <> "\"" <> printName n <> "\"" <> pad
237-
printLitTok (ByteStrTok n) = "b\"" <> printName n <> "\""
238-
printLitTok (ByteStrRawTok n m) = let pad = pretty (replicate m '#') in "rb" <> pad <> "\"" <> printName n <> "\"" <> pad
235+
printLitTok (StrTok n) = "\"" <> string hardline n <> "\""
236+
printLitTok (StrRawTok n m) = let pad = pretty (replicate m '#')
237+
in "r" <> pad <> "\"" <> string hardline n <> "\"" <> pad
238+
printLitTok (ByteStrTok n) = "b\"" <> string hardline n <> "\""
239+
printLitTok (ByteStrRawTok n m) = let pad = pretty (replicate m '#') in "br" <> pad <> "\"" <> string hardline n <> "\"" <> pad
239240

240241
-- | Print a nonterminal
241242
printNonterminal :: Nonterminal a -> Doc a
@@ -298,7 +299,7 @@ printExprOuterAttrStyle expr isInline = glue (printEitherAttrs (expressionAttrs
298299
Vec as exprs x -> annotate x (block Bracket True "," (printInnerAttrs as) (printExpr <$> exprs))
299300
Call _ func [arg] x -> annotate x (printExpr func <> parens (printExpr arg))
300301
Call _ func args x -> annotate x (printExpr func <> block Paren True "," mempty (printExpr <$> args))
301-
MethodCall{} -> chainedMethodCalls expr id
302+
MethodCall{} -> chainedMethodCalls expr False id
302303
TupExpr as [e] x -> annotate x (block Paren True "" (printInnerAttrs as) [ printExpr e <> "," ])
303304
TupExpr as es x -> annotate x (block Paren True "," (printInnerAttrs as) (printExpr <$> es))
304305
Binary _ op lhs rhs x -> annotate x (hsep [ printExpr lhs, printBinOp op, printExpr rhs ])
@@ -319,10 +320,10 @@ printExprOuterAttrStyle expr isInline = glue (printEitherAttrs (expressionAttrs
319320
Catch attrs blk x -> annotate x ("do catch" <+> printBlockWithAttrs True blk attrs)
320321
Assign _ lhs rhs x -> annotate x (hsep [ printExpr lhs, "=", printExpr rhs ])
321322
AssignOp _ op lhs rhs x -> annotate x (hsep [ printExpr lhs, printBinOp op <> "=", printExpr rhs ])
322-
FieldAccess{} -> chainedMethodCalls expr id
323-
TupField{} -> chainedMethodCalls expr id
324-
Index{} -> chainedMethodCalls expr id
325-
Range _ start end limits x -> annotate x (hcat [ perhaps printExpr start, printRangeLimits limits, perhaps printExpr end ])
323+
FieldAccess{} -> chainedMethodCalls expr False id
324+
TupField{} -> chainedMethodCalls expr False id
325+
Index{} -> chainedMethodCalls expr False id
326+
Range _ start end limits x -> annotate x (perhaps printExpr start <+> printRangeLimits limits <+> perhaps printExpr end)
326327
PathExpr _ Nothing path x -> annotate x (printPath path True)
327328
PathExpr _ (Just qs) path x -> annotate x (printQPath path qs True)
328329
AddrOf _ mut e x -> annotate x ("&" <> printMutability mut <+> printExpr e)
@@ -335,7 +336,7 @@ printExprOuterAttrStyle expr isInline = glue (printEitherAttrs (expressionAttrs
335336
in annotate x (printPath p True <+> block Brace True mempty (printInnerAttrs as) body)
336337
Repeat attrs e cnt x -> annotate x (brackets (printInnerAttrs attrs <+> printExpr e <> ";" <+> printExpr cnt))
337338
ParenExpr attrs e x -> annotate x (parens (printInnerAttrs attrs <+> printExpr e))
338-
Try{} -> chainedMethodCalls expr id
339+
Try{} -> chainedMethodCalls expr False id
339340
where
340341
printLbl = perhaps (\i -> printLifetime i <> ":")
341342
glue = if isInline then (<+>) else (</>)
@@ -352,23 +353,26 @@ printExprOuterAttrStyle expr isInline = glue (printEitherAttrs (expressionAttrs
352353
-- * try
353354
--
354355
chainedMethodCalls :: Expr a -- ^ expression
356+
-> Bool -- ^ last expression was a 'TupField' (if we have two
357+
-- successive 'TupField's, we need a space between them
358+
-- to prevent them from looking like a float literal)
355359
-> (Doc a -> Doc a) -- ^ suffix to the expression
356360
-> Doc a
357-
chainedMethodCalls (MethodCall _ s i ts' as x) fdoc
361+
chainedMethodCalls (MethodCall _ s i ts' as x) _ fdoc
358362
= let tys = perhaps (\ts -> "::<" <> commas ts printType <> ">") ts'
359363
as' = case as of
360364
[a] -> parens (printExpr a)
361365
_ -> block Paren True "," mempty (printExpr <$> as)
362-
in chainedMethodCalls s (annotate x . (<##> fdoc (indent n (hcat [ ".", printIdent i, tys, as' ]))))
363-
chainedMethodCalls (FieldAccess _ s i x) fdoc
364-
= chainedMethodCalls s (annotate x . (<##> fdoc (indent n (hcat [ ".", printIdent i ]))))
365-
chainedMethodCalls (Try _ s x) fdoc
366-
= chainedMethodCalls s (annotate x . (<> fdoc "?"))
367-
chainedMethodCalls (Index _ s i x) fdoc
368-
= chainedMethodCalls s (annotate x . (<> fdoc ("[" <> block NoDelim True mempty mempty [printExpr i] <> "]")))
369-
chainedMethodCalls (TupField _ s i x) fdoc
370-
= chainedMethodCalls s (annotate x . (<> fdoc ("." <> pretty i)))
371-
chainedMethodCalls e fdoc = group (fdoc (printExpr e))
366+
in chainedMethodCalls s False (annotate x . (<##> fdoc (indent n (hcat [ ".", printIdent i, tys, as' ]))))
367+
chainedMethodCalls (FieldAccess _ s i x) _ fdoc
368+
= chainedMethodCalls s False (annotate x . (<##> fdoc (indent n (hcat [ ".", printIdent i ]))))
369+
chainedMethodCalls (Try _ s x) _ fdoc
370+
= chainedMethodCalls s False (annotate x . (<> fdoc "?"))
371+
chainedMethodCalls (Index _ s i x) _ fdoc
372+
= chainedMethodCalls s False (annotate x . (<> fdoc ("[" <> block NoDelim True mempty mempty [printExpr i] <> "]")))
373+
chainedMethodCalls (TupField _ s i x) t fdoc
374+
= chainedMethodCalls s True (annotate x . (<> fdoc ("." <> pretty i <> when t " ")))
375+
chainedMethodCalls e _ fdoc = group (fdoc (printExpr e))
372376

373377
-- | Print a string literal
374378
printStr :: StrStyle -> String -> Doc a
@@ -508,12 +512,12 @@ printEitherAttrs attrs kind inline = unless (null attrs') (glue attrs')
508512

509513
-- | Print an attribute (@print_attribute_inline@ or @print_attribute@)
510514
printAttr :: Attribute a -> Bool -> Doc a
511-
printAttr (Attribute Inner p ts x) _ = annotate x ("#![" <> printPath p True <> printTokenStreamSp ts <> printTokenStream ts <> "]")
512-
printAttr (Attribute Outer p ts x) _ = annotate x ("#[" <> printPath p True <> printTokenStreamSp ts <> printTokenStream ts <> "]")
513-
printAttr (SugaredDoc Inner _ c x) True = annotate x ("/*!" <> pretty c <> "*/")
514-
printAttr (SugaredDoc Outer _ c x) True = annotate x ("/**" <> pretty c <> "*/")
515-
printAttr a@(SugaredDoc Inner _ c x) False = annotate x (flatAlt ("//!" <+> pretty c) (printAttr a True))
516-
printAttr a@(SugaredDoc Outer _ c x) False = annotate x (flatAlt ("///" <+> pretty c) (printAttr a True))
515+
printAttr (Attribute Inner p ts x) _ = annotate x ("#![" <> printPath p True <> printTokenStreamSp ts <> printTokenStream ts <> "]")
516+
printAttr (Attribute Outer p ts x) _ = annotate x ("#[" <> printPath p True <> printTokenStreamSp ts <> printTokenStream ts <> "]")
517+
printAttr (SugaredDoc Inner True c x) _ = annotate x (noIndent ("/*!" <> string hardline c <> "*/"))
518+
printAttr (SugaredDoc Outer True c x) _ = annotate x (noIndent ("/**" <> string hardline c <> "*/"))
519+
printAttr (SugaredDoc Inner False c x) _ = annotate x (flatAlt ("//!" <> pretty c) ("/*!" <> pretty c <> "*/"))
520+
printAttr (SugaredDoc Outer False c x) _ = annotate x (flatAlt ("///" <> pretty c) ("/**" <> pretty c <> "*/"))
517521

518522
-- | Print an identifier as is, or as cooked string if containing a hyphen
519523
printCookedIdent :: Ident -> Doc a
@@ -667,9 +671,9 @@ printVis InheritedV = mempty
667671
-- | Print a foreign item (@print_foreign_item@)
668672
printForeignItem :: ForeignItem a -> Doc a
669673
printForeignItem (ForeignFn attrs vis ident decl generics x) = annotate x $
670-
printOuterAttrs attrs <+> printFn decl Normal NotConst Rust (Just ident) generics vis Nothing
674+
printOuterAttrs attrs <#> printFn decl Normal NotConst Rust (Just ident) generics vis Nothing
671675
printForeignItem (ForeignStatic attrs vis ident ty mut x) = annotate x $
672-
printOuterAttrs attrs <+> printVis vis <+> "static" <+> printMutability mut <+> printIdent ident <> ":" <+> printType ty <> ";"
676+
printOuterAttrs attrs <#> printVis vis <+> "static" <+> printMutability mut <+> printIdent ident <> ":" <+> printType ty <> ";"
673677

674678
-- | Print a struct definition (@print_struct@)
675679
printStruct :: VariantData a -> Generics a -> Ident -> Bool -> Bool -> Doc a
@@ -776,8 +780,8 @@ printFullMutability Immutable = "const"
776780
printPat :: Pat a -> Doc a
777781
printPat (WildP x) = annotate x "_"
778782
printPat (IdentP bm p s x) = annotate x (printBindingMode bm <+> printIdent p <+> perhaps (\p' -> "@" <+> printPat p') s)
779-
printPat (StructP p fs b x) = annotate x (printPath p True <+> block Brace True "," mempty body)
780-
where body = (printFieldPat `map` fs) ++ [ ".." | b ]
783+
printPat (StructP p fs False x) = annotate x (printPath p True <+> block Brace True "," mempty (printFieldPat `map` fs))
784+
printPat (StructP p fs True x) = annotate x (printPath p True <+> block Brace True mempty mempty ([ printFieldPat f <> "," | f <- fs ] ++ [ ".." ]))
781785
printPat (TupleStructP p es Nothing x) = annotate x (printPath p True <> "(" <> commas es printPat <> ")")
782786
printPat (TupleStructP p es (Just d) x) = let (before,after) = splitAt d es
783787
in annotate x (printPath p True <> "(" <> commas before printPat <> when (d /= 0) ","

src/Language/Rust/Pretty/Literals.hs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,20 @@ module Language.Rust.Pretty.Literals (
1717
) where
1818

1919
import Language.Rust.Syntax.AST
20+
import Language.Rust.Pretty.Util
2021

21-
import Data.Text.Prettyprint.Doc (hcat, annotate, (<>), Doc, pretty)
22+
import Data.Text.Prettyprint.Doc (hcat, annotate, (<>), Doc, pretty, hardline)
2223

2324
import Data.Char (intToDigit, ord, chr)
2425
import Data.Word (Word8)
2526

2627
-- | Print a literal (@print_literal@)
2728
printLit :: Lit a -> Doc a
28-
printLit lit = case lit of
29+
printLit lit = noIndent $ case lit of
2930
(Str str Cooked s x) -> annotate x (hcat [ "\"", foldMap escapeChar str, "\"", suf s ])
30-
(Str str (Raw n) s x) -> annotate x (hcat [ "r", pad n, "\"", pretty str, "\"", pad n, suf s ])
31+
(Str str (Raw n) s x) -> annotate x (hcat [ "r", pad n, "\"", string hardline str, "\"", pad n, suf s ])
3132
(ByteStr str Cooked s x) -> annotate x (hcat [ "b\"", foldMap escapeByte str, "\"", suf s ])
32-
(ByteStr str (Raw n) s x) -> annotate x (hcat [ "br", pad n, "\"", pretty (map byte2Char str), "\"", pad n, suf s ])
33+
(ByteStr str (Raw n) s x) -> annotate x (hcat [ "br", pad n, "\"", string hardline (map byte2Char str), "\"", pad n, suf s ])
3334
(Char c s x) -> annotate x (hcat [ "'", escapeChar c, "'", suf s ])
3435
(Byte b s x) -> annotate x (hcat [ "b'", escapeByte b, "'", suf s ])
3536
(Int b i s x) -> annotate x (hcat [ printIntLit i b, suf s ])
@@ -92,9 +93,9 @@ escapeByte w8 = case byte2Char w8 of
9293

9394
-- | Escape a unicode character. Based on @std::ascii::escape_default@
9495
escapeChar :: Char -> Doc a
95-
escapeChar c | c <= '\xff' = escapeByte (char2Byte c)
96-
| c <= '\xffff' = "\\u" <> padHex 4 (ord c)
97-
| otherwise = "\\U" <> padHex 8 (ord c)
96+
escapeChar c | c <= '\x7f' = escapeByte (char2Byte c)
97+
| c <= '\xffff' = "\\u{" <> padHex 4 (ord c) <> "}"
98+
| otherwise = "\\u{" <> padHex 6 (ord c) <> "}"
9899

99100
-- | Convert a number to its padded hexadecimal form
100101
padHex :: Integral a => Int -> a -> Doc b

src/Language/Rust/Pretty/Util.hs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ module Language.Rust.Pretty.Util where
2626

2727
import Data.Monoid ((<>))
2828
import Data.List (mapAccumL)
29+
import Data.String (IsString(..))
2930

3031
import qualified Data.Text.Prettyprint.Doc as PP
3132
import Data.Text.Prettyprint.Doc.Internal.Type (Doc(..))
@@ -117,7 +118,13 @@ ungroup :: Doc a -> Doc a
117118
ungroup (Union _ x) = x
118119
ungroup y = y
119120

121+
-- | Remove all indent
122+
noIndent :: Doc a -> Doc a
123+
noIndent d = PP.nesting (\i -> PP.nest (negate i) d)
120124

125+
-- | Translate '\n' in a string using the provided 'Doc' instead of 'line'
126+
string :: Doc a -> String -> Doc a
127+
string new = foldMap (\c -> case c of { '\n' -> new; _ -> Char c })
121128

122129
-- | This is the most general function for printing blocks. It operates with any delimiter, any
123130
-- seperator, an optional leading attribute doc (which isn't followed by a seperator), and wraps a

0 commit comments

Comments
 (0)