Skip to content

Commit 964a38c

Browse files
committed
Tokenizer/PHP: bug fix - fix performance issue
Happened to come across this while investigating something else. As it was, as long as no open parenthesis or variable was encountered, this snippet would loop to the end of the file for each `T_ARRAY` token encountered as it would only `break` in the `if/elseif` and there was no `else` clause. Basically, we only want the `array` keyword to be tokenized as `T_ARRAY` if it is an actual array declaration. In all other cases, it should be tokenized as `T_STRING`. This fixes the performance leak by only looping to the first non-empty token after the keyword, checking if it's an open parenthesis and retokenizing the `T_ARRAY` to `T_STRING` in all other cases. It also removes the need for the separate _return type_ retokenization of the array keyword. Includes adding unit tests specifically for the array keyword.
1 parent d2574b9 commit 964a38c

File tree

4 files changed

+225
-26
lines changed

4 files changed

+225
-26
lines changed

package.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
150150
<dir name="Tokenizer">
151151
<file baseinstalldir="" name="AnonClassParenthesisOwnerTest.inc" role="test" />
152152
<file baseinstalldir="" name="AnonClassParenthesisOwnerTest.php" role="test" />
153+
<file baseinstalldir="" name="ArrayKeywordTest.inc" role="test" />
154+
<file baseinstalldir="" name="ArrayKeywordTest.php" role="test" />
153155
<file baseinstalldir="" name="AttributesTest.inc" role="test" />
154156
<file baseinstalldir="" name="AttributesTest.php" role="test" />
155157
<file baseinstalldir="" name="BackfillFnTokenTest.inc" role="test" />
@@ -2098,6 +2100,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
20982100
<install as="CodeSniffer/Core/Sniffs/AbstractArraySniffTestable.php" name="tests/Core/Sniffs/AbstractArraySniffTestable.php" />
20992101
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" />
21002102
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" />
2103+
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.php" name="tests/Core/Tokenizer/ArrayKeywordTest.php" />
2104+
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.inc" name="tests/Core/Tokenizer/ArrayKeywordTest.inc" />
21012105
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.php" name="tests/Core/Tokenizer/AttributesTest.php" />
21022106
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.inc" name="tests/Core/Tokenizer/AttributesTest.inc" />
21032107
<install as="CodeSniffer/Core/Tokenizer/BackfillFnTokenTest.php" name="tests/Core/Tokenizer/BackfillFnTokenTest.php" />
@@ -2186,6 +2190,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
21862190
<install as="CodeSniffer/Core/Sniffs/AbstractArraySniffTestable.php" name="tests/Core/Sniffs/AbstractArraySniffTestable.php" />
21872191
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" />
21882192
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" />
2193+
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.php" name="tests/Core/Tokenizer/ArrayKeywordTest.php" />
2194+
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.inc" name="tests/Core/Tokenizer/ArrayKeywordTest.inc" />
21892195
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.php" name="tests/Core/Tokenizer/AttributesTest.php" />
21902196
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.inc" name="tests/Core/Tokenizer/AttributesTest.inc" />
21912197
<install as="CodeSniffer/Core/Tokenizer/BackfillFnTokenTest.php" name="tests/Core/Tokenizer/BackfillFnTokenTest.php" />

src/Tokenizers/PHP.php

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,23 +1769,6 @@ function return types. We want to keep the parenthesis map clean,
17691769

17701770
break;
17711771
}//end for
1772-
1773-
// Any T_ARRAY tokens we find between here and the next
1774-
// token that can't be part of the return type, need to be
1775-
// converted to T_STRING tokens.
1776-
for ($x; $x < $numTokens; $x++) {
1777-
if ((is_array($tokens[$x]) === false && $tokens[$x] !== '|')
1778-
|| (is_array($tokens[$x]) === true && isset($allowed[$tokens[$x][0]]) === false)
1779-
) {
1780-
break;
1781-
} else if (is_array($tokens[$x]) === true && $tokens[$x][0] === T_ARRAY) {
1782-
$tokens[$x][0] = T_STRING;
1783-
1784-
if (PHP_CODESNIFFER_VERBOSITY > 1) {
1785-
echo "\t\t* token $x changed from T_ARRAY to T_STRING".PHP_EOL;
1786-
}
1787-
}
1788-
}
17891772
}//end if
17901773
}//end if
17911774
}//end if
@@ -2066,20 +2049,25 @@ function return types. We want to keep the parenthesis map clean,
20662049
}
20672050
}//end if
20682051

2069-
// This is a special condition for T_ARRAY tokens used for
2070-
// type hinting function arguments as being arrays. We want to keep
2071-
// the parenthesis map clean, so let's tag these tokens as
2052+
// This is a special condition for T_ARRAY tokens used for anything else
2053+
// but array declarations, like type hinting function arguments as
2054+
// being arrays.
2055+
// We want to keep the parenthesis map clean, so let's tag these tokens as
20722056
// T_STRING.
20732057
if ($newToken['code'] === T_ARRAY) {
2074-
for ($i = $stackPtr; $i < $numTokens; $i++) {
2075-
if ($tokens[$i] === '(') {
2076-
break;
2077-
} else if ($tokens[$i][0] === T_VARIABLE) {
2078-
$newToken['code'] = T_STRING;
2079-
$newToken['type'] = 'T_STRING';
2058+
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
2059+
if (is_array($tokens[$i]) === false
2060+
|| isset(Util\Tokens::$emptyTokens[$tokens[$i][0]]) === false
2061+
) {
2062+
// Non-empty content.
20802063
break;
20812064
}
20822065
}
2066+
2067+
if ($tokens[$i] !== '(' && $i !== $numTokens) {
2068+
$newToken['code'] = T_STRING;
2069+
$newToken['type'] = 'T_STRING';
2070+
}
20832071
}
20842072

20852073
// This is a special case when checking PHP 5.5+ code in PHP < 5.5
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php
2+
3+
/* testEmptyArray */
4+
$var = array();
5+
6+
/* testArrayWithSpace */
7+
$var = array (1 => 10);
8+
9+
/* testArrayWithComment */
10+
$var = Array /*comment*/ (1 => 10);
11+
12+
/* testNestingArray */
13+
$var = array(
14+
/* testNestedArray */
15+
array(
16+
'key' => 'value',
17+
18+
/* testClosureReturnType */
19+
'closure' => function($a) use($global) : Array {},
20+
),
21+
);
22+
23+
/* testFunctionDeclarationParamType */
24+
function foo(array $a) {}
25+
26+
/* testFunctionDeclarationReturnType */
27+
function foo($a) : int|array|null {}
28+
29+
class Bar {
30+
/* testClassConst */
31+
const ARRAY = [];
32+
33+
/* testClassMethod */
34+
public function array() {}
35+
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
<?php
2+
/**
3+
* Tests that the array keyword is tokenized correctly.
4+
*
5+
* @author Juliette Reinders Folmer <phpcs_nospam@adviesenzo.nl>
6+
* @copyright 2021 Squiz Pty Ltd (ABN 77 084 670 600)
7+
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8+
*/
9+
10+
namespace PHP_CodeSniffer\Tests\Core\Tokenizer;
11+
12+
use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest;
13+
14+
class ArrayKeywordTest extends AbstractMethodUnitTest
15+
{
16+
17+
18+
/**
19+
* Test that the array keyword is correctly tokenized as `T_ARRAY`.
20+
*
21+
* @param string $testMarker The comment prefacing the target token.
22+
* @param string $testContent Optional. The token content to look for.
23+
*
24+
* @dataProvider dataArrayKeyword
25+
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
26+
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
27+
*
28+
* @return void
29+
*/
30+
public function testArrayKeyword($testMarker, $testContent='array')
31+
{
32+
$tokens = self::$phpcsFile->getTokens();
33+
34+
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
35+
$tokenArray = $tokens[$token];
36+
37+
$this->assertSame(T_ARRAY, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (code)');
38+
$this->assertSame('T_ARRAY', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (type)');
39+
40+
$this->assertArrayHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is not set');
41+
$this->assertArrayHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is not set');
42+
$this->assertArrayHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is not set');
43+
44+
}//end testArrayKeyword()
45+
46+
47+
/**
48+
* Data provider.
49+
*
50+
* @see testArrayKeyword()
51+
*
52+
* @return array
53+
*/
54+
public function dataArrayKeyword()
55+
{
56+
return [
57+
'empty array' => ['/* testEmptyArray */'],
58+
'array with space before parenthesis' => ['/* testArrayWithSpace */'],
59+
'array with comment before parenthesis' => [
60+
'/* testArrayWithComment */',
61+
'Array',
62+
],
63+
'nested: outer array' => ['/* testNestingArray */'],
64+
'nested: inner array' => ['/* testNestedArray */'],
65+
];
66+
67+
}//end dataArrayKeyword()
68+
69+
70+
/**
71+
* Test that the array keyword when used in a type declaration is correctly tokenized as `T_STRING`.
72+
*
73+
* @param string $testMarker The comment prefacing the target token.
74+
* @param string $testContent Optional. The token content to look for.
75+
*
76+
* @dataProvider dataArrayType
77+
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
78+
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
79+
*
80+
* @return void
81+
*/
82+
public function testArrayType($testMarker, $testContent='array')
83+
{
84+
$tokens = self::$phpcsFile->getTokens();
85+
86+
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
87+
$tokenArray = $tokens[$token];
88+
89+
$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)');
90+
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)');
91+
92+
$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set');
93+
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set');
94+
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set');
95+
96+
}//end testArrayType()
97+
98+
99+
/**
100+
* Data provider.
101+
*
102+
* @see testArrayType()
103+
*
104+
* @return array
105+
*/
106+
public function dataArrayType()
107+
{
108+
return [
109+
'closure return type' => [
110+
'/* testClosureReturnType */',
111+
'Array',
112+
],
113+
'function param type' => ['/* testFunctionDeclarationParamType */'],
114+
'function union return type' => ['/* testFunctionDeclarationReturnType */'],
115+
];
116+
117+
}//end dataArrayType()
118+
119+
120+
/**
121+
* Verify that the retokenization of `T_ARRAY` tokens to `T_STRING` is handled correctly
122+
* for tokens with the contents 'array' which aren't in actual fact the array keyword.
123+
*
124+
* @param string $testMarker The comment prefacing the target token.
125+
* @param string $testContent The token content to look for.
126+
*
127+
* @dataProvider dataNotArrayKeyword
128+
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
129+
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
130+
*
131+
* @return void
132+
*/
133+
public function testNotArrayKeyword($testMarker, $testContent='array')
134+
{
135+
$tokens = self::$phpcsFile->getTokens();
136+
137+
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
138+
$tokenArray = $tokens[$token];
139+
140+
$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)');
141+
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)');
142+
143+
$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set');
144+
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set');
145+
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set');
146+
147+
}//end testNotArrayKeyword()
148+
149+
150+
/**
151+
* Data provider.
152+
*
153+
* @see testNotArrayKeyword()
154+
*
155+
* @return array
156+
*/
157+
public function dataNotArrayKeyword()
158+
{
159+
return [
160+
'class-constant-name' => [
161+
'/* testClassConst */',
162+
'ARRAY',
163+
],
164+
'class-method-name' => ['/* testClassMethod */'],
165+
];
166+
167+
}//end dataNotArrayKeyword()
168+
169+
170+
}//end class

0 commit comments

Comments
 (0)