Skip to content

Commit 8e15765

Browse files
Merge branch '4.4'
* 4.4: cs fix bis cs fix [DomCrawler] add a value() method, normalize whitespaces
2 parents b6fb11d + 117563a commit 8e15765

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ CHANGELOG
1414
* Added `Crawler::matches()` method.
1515
* Added `Crawler::closest()` method.
1616
* Added `Crawler::outerHtml()` method.
17+
* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces.
1718

1819
4.3.0
1920
-----

Crawler.php

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -569,15 +569,16 @@ public function nodeName()
569569
}
570570

571571
/**
572-
* Returns the node value of the first node of the list.
572+
* Returns the text of the first node of the list.
573573
*
574-
* @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown
574+
* @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown
575+
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
575576
*
576577
* @return string The node value
577578
*
578579
* @throws \InvalidArgumentException When current node is empty
579580
*/
580-
public function text($default = null)
581+
public function text($default = null, bool $normalizeWhitespace = false)
581582
{
582583
if (!$this->nodes) {
583584
if (0 < \func_num_args()) {
@@ -587,7 +588,13 @@ public function text($default = null)
587588
throw new \InvalidArgumentException('The current node list is empty.');
588589
}
589590

590-
return $this->getNode(0)->nodeValue;
591+
$text = $this->getNode(0)->nodeValue;
592+
593+
if (\func_num_args() > 1 && func_get_arg(1)) {
594+
return trim(preg_replace('/(?:\s{2,}+|[^\S ])/', ' ', $text));
595+
}
596+
597+
return $text;
591598
}
592599

593600
/**

Tests/AbstractCrawlerTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,14 @@ public function testEq()
253253
$this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
254254
}
255255

256+
public function testNormalizeWhiteSpace()
257+
{
258+
$crawler = $this->createTestCrawler()->filterXPath('//p');
259+
$this->assertSame('Elsa <3', $crawler->text(null, true), '->text(null, true) returns the text with normalized whitespace');
260+
$this->assertNotSame('Elsa <3', $crawler->text(null, false));
261+
$this->assertNotSame('Elsa <3', $crawler->text());
262+
}
263+
256264
public function testEach()
257265
{
258266
$data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
@@ -1235,6 +1243,10 @@ public function createTestCrawler($uri = null)
12351243
<li>Two Bis</li>
12361244
<li>Three Bis</li>
12371245
</ul>
1246+
<p class="whitespace">
1247+
Elsa
1248+
&lt;3
1249+
</p>
12381250
<div id="parent">
12391251
<div id="child"></div>
12401252
<div id="child2" xmlns:foo="http://example.com"></div>

0 commit comments

Comments
 (0)