diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index bdaf264e9adce..d7e4d6bb63aeb 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -4052,6 +4052,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { // the sequence "<::" will be unconditionally treated as "[:". // Cf. Lexer::LexTokenInternal. LangOpts.Digraphs = SinceCpp11; + LangOpts.Trigraphs = Style.isCpp(); LangOpts.LineComment = 1; LangOpts.Bool = 1; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 06f68ec8b0fc1..f9442265a8e59 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1220,12 +1220,11 @@ static size_t countLeadingWhitespace(StringRef Text) { break; // Splice found, consume it. Cur = Lookahead + 1; - } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' && - (Cur[3] == '\n' || Cur[3] == '\r')) { + } else if (End - Cur >= 4u && Cur[0] == '?' && Cur[1] == '?' && + Cur[2] == '/' && (Cur[3] == '\n' || Cur[3] == '\r')) { // Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the // characters are quoted individually in this comment because if we write // them together some compilers warn that we have a trigraph in the code. - assert(End - Cur >= 4); Cur += 4; } else { break; @@ -1300,8 +1299,11 @@ FormatToken *FormatTokenLexer::getNextToken() { case '\\': case '?': case '/': - // The text was entirely whitespace when this loop was entered. Thus - // this has to be an escape sequence. + // The code preceding the loop and in the countLeadingWhitespace + // function guarantees that Text is entirely whitespace, not including + // comments but including escaped newlines which may be escaped with a + // trigraph. So if 1 of these characters show up, then it has to be in + // an escape sequence. assert(Text.substr(i, 4) == "\?\?/\r" || Text.substr(i, 4) == "\?\?/\n" || (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" || diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 944e7c3fb152a..40b85f0dad355 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6681,6 +6681,31 @@ TEST_F(FormatTest, EscapedNewlines) { " int x(int a);", AlignLeft); + // Escaped with a trigraph. + verifyFormat("#define A \\\n" + " int i; \\\n" + " int j;", + "#define A \?\?/\n" + "int i;\?\?/\n" + " int j;", + Narrow); + verifyFormat("#define A \\\r\n" + " int i; \\\r\n" + " int j;", + "#define A \?\?/\r\n" + "int i;\?\?/\r\n" + " int j;", + Narrow); + verifyFormat("#define A int i;", "#define A \?\?/\n" + "int i;"); + verifyFormat("#define A int i;", "#define A \?\?/\r\n" + "int i;"); + // In a language that does not support the trigraph, the program should not + // crash. + verifyNoCrash("#define A \?\?/\n" + "int i;", + getGoogleStyle(FormatStyle::LK_CSharp)); + // CRLF line endings verifyFormat("#define A \\\r\n int i; \\\r\n int j;", "#define A \\\r\nint i;\\\r\n int j;", Narrow);