Skip to content

Commit 94375b8

Browse files
authored
Merge pull request #218 from ohader/comment-handling
Address comment handling issues
2 parents d3786d4 + c2aeec0 commit 94375b8

File tree

3 files changed

+20
-7
lines changed

3 files changed

+20
-7
lines changed

src/HTML5/Parser/Scanner.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public function position()
104104
*/
105105
public function peek()
106106
{
107-
if (($this->char + 1) <= $this->EOF) {
107+
if (($this->char + 1) < $this->EOF) {
108108
return $this->data[$this->char + 1];
109109
}
110110

src/HTML5/Parser/Tokenizer.php

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -712,18 +712,24 @@ protected function isCommentEnd()
712712
return true;
713713
}
714714

715-
// If it doesn't start with -, not the end.
716-
if ('-' != $tok) {
715+
// If next two tokens are not '--', not the end.
716+
if ('-' != $tok || '-' != $this->scanner->peek()) {
717717
return false;
718718
}
719719

720-
// Advance one, and test for '->'
721-
if ('-' == $this->scanner->next() && '>' == $this->scanner->peek()) {
720+
$this->scanner->consume(2); // Consume '-' and one of '!' or '>'
721+
722+
// Test for '>'
723+
if ('>' == $this->scanner->current()) {
724+
return true;
725+
}
726+
// Test for '!>'
727+
if ('!' == $this->scanner->current() && '>' == $this->scanner->peek()) {
722728
$this->scanner->consume(); // Consume the last '>'
723729
return true;
724730
}
725-
// Unread '-';
726-
$this->scanner->unconsume(1);
731+
// Unread '-' and one of '!' or '>';
732+
$this->scanner->unconsume(2);
727733

728734
return false;
729735
}

test/HTML5/Parser/TokenizerTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,17 @@ public function testComment()
197197
{
198198
$good = array(
199199
'<!--easy-->' => 'easy',
200+
'<!--easy--!>' => 'easy',
200201
'<!-- 1 > 0 -->' => ' 1 > 0 ',
202+
'<!-- 1 > 0 --!>' => ' 1 > 0 ',
201203
'<!-- --$i -->' => ' --$i ',
204+
'<!-- --$i --!>' => ' --$i ',
202205
'<!----$i-->' => '--$i',
206+
'<!----$i--!>' => '--$i',
203207
"<!--\nHello World.\na-->" => "\nHello World.\na",
208+
"<!--\nHello World.\na--!>" => "\nHello World.\na",
204209
'<!-- <!-- -->' => ' <!-- ',
210+
'<!-- <!-- --!>' => ' <!-- ',
205211
);
206212
foreach ($good as $test => $expected) {
207213
$events = $this->parse($test);
@@ -213,6 +219,7 @@ public function testComment()
213219
'<!--Hello' => 'Hello',
214220
"<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
215221
'<!--' => '',
222+
'<!--<!--' => '<!--',
216223
);
217224
foreach ($fail as $test => $expected) {
218225
$events = $this->parse($test);

0 commit comments

Comments
 (0)