Skip to content

Commit 5b26e33

Browse files
klausinicolas-grekas
authored andcommitted
[DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
1 parent ef48f59 commit 5b26e33

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -856,13 +856,12 @@ private function relativize($xpath)
856856
{
857857
$expressions = array();
858858

859-
$unionPattern = '/\|(?![^\[]*\])/';
860859
// An expression which will never match to replace expressions which cannot match in the crawler
861860
// We cannot simply drop
862861
$nonMatchingExpression = 'a[name() = "b"]';
863862

864863
// Split any unions into individual expressions.
865-
foreach (preg_split($unionPattern, $xpath) as $expression) {
864+
foreach ($this->splitUnionParts($xpath) as $expression) {
866865
$expression = trim($expression);
867866
$parenthesis = '';
868867

@@ -912,6 +911,47 @@ private function relativize($xpath)
912911
return implode(' | ', $expressions);
913912
}
914913

914+
/**
915+
* Splits the XPath into parts that are separated by the union operator.
916+
*
917+
* @param string $xpath
918+
*
919+
* @return string[]
920+
*/
921+
private function splitUnionParts($xpath)
922+
{
923+
// Split any unions into individual expressions. We need to iterate
924+
// through the string to correctly parse opening/closing quotes and
925+
// braces which is not possible with regular expressions.
926+
$unionParts = array();
927+
$inSingleQuotedString = false;
928+
$inDoubleQuotedString = false;
929+
$openedBrackets = 0;
930+
$lastUnion = 0;
931+
$xpathLength = strlen($xpath);
932+
for ($i = 0; $i < $xpathLength; ++$i) {
933+
$char = $xpath[$i];
934+
935+
if ($char === "'" && !$inDoubleQuotedString) {
936+
$inSingleQuotedString = !$inSingleQuotedString;
937+
} elseif ($char === '"' && !$inSingleQuotedString) {
938+
$inDoubleQuotedString = !$inDoubleQuotedString;
939+
} elseif (!$inSingleQuotedString && !$inDoubleQuotedString) {
940+
if ($char === '[') {
941+
++$openedBrackets;
942+
} elseif ($char === ']') {
943+
--$openedBrackets;
944+
} elseif ($char === '|' && $openedBrackets === 0) {
945+
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
946+
$lastUnion = $i + 1;
947+
}
948+
}
949+
}
950+
$unionParts[] = substr($xpath, $lastUnion);
951+
952+
return $unionParts;
953+
}
954+
915955
/**
916956
* @param int $position
917957
*

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ public function testFilterXpathComplexQueries()
387387
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
388388
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
389389
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
390+
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
390391
}
391392

392393
public function testFilterXPath()
@@ -548,7 +549,7 @@ public function testFilterXPathWithSelfAxes()
548549

549550
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
550551
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
551-
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
552+
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
552553
}
553554

554555
public function testFilter()
@@ -969,6 +970,8 @@ public function createTestCrawler($uri = null)
969970
970971
<a href="?get=param">GetLink</a>
971972
973+
<a href="/example">Klausi|Claudiu</a>
974+
972975
<form action="foo" id="FooFormId">
973976
<input type="text" value="TextValue" name="TextName" />
974977
<input type="submit" value="FooValue" name="FooName" id="FooId" />

0 commit comments

Comments
 (0)