Skip to content

Commit 8512c64

Browse files
trurlavaee
authored andcommitted
[clang][scandeps] Improve handling of rawstrings. (llvm#139504)
1 parent 0a93f63 commit 8512c64

File tree

3 files changed

+194
-12
lines changed

3 files changed

+194
-12
lines changed

clang/include/clang/Lex/Lexer.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,11 @@ class Lexer : public PreprocessorLexer {
585585
/// sequence.
586586
static bool isNewLineEscaped(const char *BufferStart, const char *Str);
587587

588+
/// getEscapedNewLineSize - Return the size of the specified escaped newline,
589+
/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry
590+
/// to this function.
591+
static unsigned getEscapedNewLineSize(const char *P);
592+
588593
/// Diagnose use of a delimited or named escape sequence.
589594
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc,
590595
bool Named,
@@ -725,11 +730,6 @@ class Lexer : public PreprocessorLexer {
725730
/// method.
726731
SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
727732

728-
/// getEscapedNewLineSize - Return the size of the specified escaped newline,
729-
/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry
730-
/// to this function.
731-
static unsigned getEscapedNewLineSize(const char *P);
732-
733733
/// SkipEscapedNewLines - If P points to an escaped newline (or a series of
734734
/// them), skip over them and return the first non-escaped-newline found,
735735
/// otherwise return P.

clang/lib/Lex/DependencyDirectivesScanner.cpp

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,30 @@ static void skipOverSpaces(const char *&First, const char *const End) {
206206
++First;
207207
}
208208

209+
// Move back by one character, skipping escaped newlines (backslash + \n)
210+
static char previousChar(const char *First, const char *&Current) {
211+
assert(Current > First);
212+
--Current;
213+
while (Current > First && isVerticalWhitespace(*Current)) {
214+
// Check if the previous character is a backslash
215+
if (Current > First && *(Current - 1) == '\\') {
216+
// Use Lexer's getEscapedNewLineSize to get the size of the escaped
217+
// newline
218+
unsigned EscapeSize = Lexer::getEscapedNewLineSize(Current);
219+
if (EscapeSize > 0) {
220+
// Skip back over the entire escaped newline sequence (backslash +
221+
// newline)
222+
Current -= (1 + EscapeSize);
223+
} else {
224+
break;
225+
}
226+
} else {
227+
break;
228+
}
229+
}
230+
return *Current;
231+
}
232+
209233
[[nodiscard]] static bool isRawStringLiteral(const char *First,
210234
const char *Current) {
211235
assert(First <= Current);
@@ -215,25 +239,27 @@ static void skipOverSpaces(const char *&First, const char *const End) {
215239
return false;
216240

217241
// Check for an "R".
218-
--Current;
219-
if (*Current != 'R')
242+
if (previousChar(First, Current) != 'R')
220243
return false;
221-
if (First == Current || !isAsciiIdentifierContinue(*--Current))
244+
if (First == Current ||
245+
!isAsciiIdentifierContinue(previousChar(First, Current)))
222246
return true;
223247

224248
// Check for a prefix of "u", "U", or "L".
225249
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
226-
return First == Current || !isAsciiIdentifierContinue(*--Current);
250+
return First == Current ||
251+
!isAsciiIdentifierContinue(previousChar(First, Current));
227252

228253
// Check for a prefix of "u8".
229-
if (*Current != '8' || First == Current || *Current-- != 'u')
254+
if (*Current != '8' || First == Current ||
255+
previousChar(First, Current) != 'u')
230256
return false;
231-
return First == Current || !isAsciiIdentifierContinue(*--Current);
257+
return First == Current ||
258+
!isAsciiIdentifierContinue(previousChar(First, Current));
232259
}
233260

234261
static void skipRawString(const char *&First, const char *const End) {
235262
assert(First[0] == '"');
236-
assert(First[-1] == 'R');
237263

238264
const char *Last = ++First;
239265
while (Last != End && *Last != '(')
@@ -416,6 +442,14 @@ void Scanner::skipLine(const char *&First, const char *const End) {
416442
continue;
417443
}
418444

445+
// Continue on the same line if an EOL is preceded with backslash
446+
if (First + 1 < End && *First == '\\') {
447+
if (unsigned Len = isEOL(First + 1, End)) {
448+
First += 1 + Len;
449+
continue;
450+
}
451+
}
452+
419453
// Iterate over comments correctly.
420454
if (*First != '/' || End - First < 2) {
421455
LastTokenPtr = First;
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// RUN: rm -rf %t
2+
// RUN: split-file %s %t
3+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
4+
5+
//--- cdb.json.in
6+
[{
7+
"directory": "DIR",
8+
"command": "clang -c DIR/tu.c -o DIR/tu.o -IDIR/include",
9+
"file": "DIR/tu.c"
10+
}]
11+
//--- include/header.h
12+
//--- include/header2.h
13+
//--- include/header3.h
14+
//--- include/header4.h
15+
//--- include/header5.h
16+
//--- include/header6.h
17+
//--- include/header7.h
18+
//--- include/header8.h
19+
//--- include/header9.h
20+
//--- include/header10.h
21+
//--- include/header11.h
22+
//--- include/header12.h
23+
//--- include/header13.h
24+
//--- include/header14.h
25+
//--- tu.c
26+
#if 0
27+
R"x()x"
28+
#endif
29+
30+
#include "header.h"
31+
32+
#if 0
33+
R"y(";
34+
#endif
35+
#include "header2.h"
36+
37+
#if 0
38+
//")y"
39+
#endif
40+
41+
#if 0
42+
R"y(";
43+
R"z()y";
44+
#endif
45+
#include "header3.h"
46+
#if 0
47+
//")z"
48+
#endif
49+
50+
#if 0
51+
R\
52+
"y(";
53+
R"z()y";
54+
#endif
55+
#include "header4.h"
56+
#if 0
57+
//")z"
58+
#endif
59+
60+
// Test u8 prefix with escaped newline
61+
#if 0
62+
u8R\
63+
"prefix(test)prefix"
64+
#endif
65+
#include "header5.h"
66+
67+
// Test u prefix with multiple escaped newlines
68+
#if 0
69+
uR\
70+
\
71+
"multi(test)multi"
72+
#endif
73+
#include "header6.h"
74+
75+
// Test U prefix with escaped newline
76+
#if 0
77+
UR\
78+
"upper(test)upper"
79+
#endif
80+
#include "header7.h"
81+
82+
// Test L prefix with escaped newline
83+
#if 0
84+
LR\
85+
"wide(test)wide"
86+
#endif
87+
#include "header8.h"
88+
89+
// Test escaped newline with \r\n style
90+
#if 0
91+
R\
92+
"crlf(test)crlf"
93+
#endif
94+
#include "header9.h"
95+
96+
// Test multiple escaped newlines in different positions
97+
#if 0
98+
u\
99+
8\
100+
R\
101+
"complex(test)complex"
102+
#endif
103+
#include "header10.h"
104+
105+
// Test raw string that should NOT be treated as raw (no R prefix due to identifier continuation)
106+
#if 0
107+
identifierR"notraw(test)notraw"
108+
#endif
109+
#include "header11.h"
110+
111+
// Test raw string with whitespace before escaped newline
112+
#if 0
113+
R \
114+
"whitespace(test)whitespace"
115+
#endif
116+
#include "header12.h"
117+
118+
// Test nested raw strings in disabled code
119+
#if 0
120+
R"outer(
121+
R"inner(content)inner"
122+
)outer"
123+
#endif
124+
#include "header13.h"
125+
126+
// Test raw string with empty delimiter
127+
#if 0
128+
R\
129+
"(empty delimiter)";
130+
#endif
131+
#include "header14.h"
132+
133+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess | FileCheck %s
134+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess-dependency-directives | FileCheck %s
135+
// CHECK: tu.c
136+
// CHECK-NEXT: header.h
137+
// CHECK-NEXT: header3.h
138+
// CHECK-NEXT: header4.h
139+
// CHECK-NEXT: header5.h
140+
// CHECK-NEXT: header6.h
141+
// CHECK-NEXT: header7.h
142+
// CHECK-NEXT: header8.h
143+
// CHECK-NEXT: header9.h
144+
// CHECK-NEXT: header10.h
145+
// CHECK-NEXT: header11.h
146+
// CHECK-NEXT: header12.h
147+
// CHECK-NEXT: header13.h
148+
// CHECK-NEXT: header14.h

0 commit comments

Comments
 (0)