From 2c32e432874f2da0cf2cfe63cd181f7e3b1ae103 Mon Sep 17 00:00:00 2001 From: "Philippe Bruhat (BooK)" Date: Fri, 4 Jul 2025 19:30:32 +0200 Subject: [PATCH 1/2] add a new warning against using \ in qw() I've seen AI-generated code try to use qw() to create lists containing strings with embedded whitespace using qw and \ to "protect" the whitespace. Things like: my @list = qw( foo bar\ baz ); Just like occurences of ',' and '#', I believe this should warn. Note that the warning will only be emitted when the \ is followed by actual whitespace, so code like the following (from lib/App/Cpan.pm) will not warn: my $epic_fail_words = join '|', qw( Error stop(?:ping)? problems force not unsupported fail(?:ed)? Cannot\s+install ); --- pod/perldiag.pod | 27 +++++++++++++++++++++++++++ t/lib/warnings/toke | 9 +++++++++ toke.c | 6 ++++++ 3 files changed, 42 insertions(+) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 6c9948f861e9..7d28d1dc75c6 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -5467,6 +5467,33 @@ character class, just escape the square brackets with the backslash: "\[=" and "=\]". The S<<-- HERE> shows whereabouts in the regular expression the problem was discovered. See L. +=item Possible attempt to escape whitespace in qw() list + +(W qw) qw() lists contain items separated by whitespace; contrary to +what some might expect, backslash characters cannot be used to "protect" +whitespace from being split, but are instead treated as literal data. +(You may have used different delimiters than the parentheses shown here; +braces are also frequently used.) + +You probably wrote something like this: + + @list = qw( + a\ string + another + ); + +Expecting to get a two elements list containing the strings C<'a string'> +and C<'another'>. Instead the list will hold four elements: C<'a\'> +(with a literal backslash), C<'string'> and C<'another'>. + +If you really want whitespace in your strings, build your list the +old-fashioned way, with quotes and commas: + + @list = ( 'a string', 'another' ); + +Note that this warnings is I emitted when the backslash is followed +by actual whitespace (that C splits on). + =item Possible attempt to put comments in qw() list (W qw) qw() lists contain items separated by whitespace; as with literal diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 6938174544fd..b3ade75122c5 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -49,6 +49,9 @@ toke.c AOK Possible attempt to put comments in qw() list @a = qw(a b # c) ; + Possible attempt to escape whitespace in qw() list + @a = qw( foo bar\ baz ) ; + %s (...) interpreted as function print ("") printf ("") @@ -366,6 +369,12 @@ Possible attempt to separate words with commas at - line 3. Possible attempt to put comments in qw() list at - line 3. ######## # toke.c +use warnings 'qw'; +@a = qw( foo bar\ baz ); +EXPECT +Possible attempt to escape whitespace in qw() list at - line 3. +######## +# toke.c use warnings 'syntax' ; print (""); print ("") and $x = 1; diff --git a/toke.c b/toke.c index 5759c7890d32..57ea5c5e4810 100644 --- a/toke.c +++ b/toke.c @@ -5821,6 +5821,7 @@ yyl_qw(pTHX_ char *s, STRLEN len) if (SvCUR(PL_lex_stuff)) { int warned_comma = !ckWARN(WARN_QW); int warned_comment = warned_comma; + int warned_escape = warned_comma; char *d = SvPV_force(PL_lex_stuff, len); while (len) { for (; isSPACE(*d) && len; --len, ++d) @@ -5840,6 +5841,11 @@ yyl_qw(pTHX_ char *s, STRLEN len) "Possible attempt to put comments in qw() list"); ++warned_comment; } + else if (!warned_escape && *d == '\\' && len > 1 && isSPACE(*(d+1)) ) { + warner(packWARN(WARN_QW), + "Possible attempt to escape whitespace in qw() list"); + ++warned_escape; + } } } else { From ee06f66cffe6d0c8442b7e1b6c580d853768f3e8 Mon Sep 17 00:00:00 2001 From: "Philippe Bruhat (BooK)" Date: Tue, 8 Jul 2025 18:30:41 +0200 Subject: [PATCH 2/2] add perldelta entry for the new qw warning --- pod/perldelta.pod | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 1b80c902c3d0..ea5770a8e27b 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -206,7 +206,14 @@ XXX L =item * -XXX L +L + +(W qw) qw() lists contain items separated by whitespace; contrary to +what some might expect, backslash characters cannot be used to "protect" +whitespace from being split, but are instead treated as literal data. + +Note that this warnings is I emitted when the backslash is followed +by actual whitespace (that C splits on). =back