Skip to content

Commit ae2c3ea

Browse files
authored
Merge pull request #4106 from oleibman/issue1284
Html Reader Preserve Unicode Whitespace Characters
2 parents a3c5c9e + 09584d2 commit ae2c3ea

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,10 @@ protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$r
627627
{
628628
foreach ($element->childNodes as $child) {
629629
if ($child instanceof DOMText) {
630-
$domText = (string) preg_replace('/\s+/u', ' ', trim($child->nodeValue ?? ''));
630+
$domText = (string) preg_replace('/\s+/', ' ', trim($child->nodeValue ?? ''));
631+
if ($domText === "\u{a0}") {
632+
$domText = '';
633+
}
631634
if (is_string($cellContent)) {
632635
// simply append the text if the cell content is a plain text string
633636
$cellContent .= $domText;
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
6+
7+
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8+
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;
9+
10+
class Issue1284Test extends AbstractFunctional
11+
{
12+
public function testIssue1284(): void
13+
{
14+
$ideographicSpace = "\u{3000}";
15+
$nbsp = "\u{a0}";
16+
$spreadsheetOld = new Spreadsheet();
17+
$osheet = $spreadsheetOld->getActiveSheet();
18+
$osheet->getCell('A1')->setValue('# item 1');
19+
$osheet->getCell('A2')->setValue("$ideographicSpace# item 2");
20+
$osheet->getCell('A3')->setValue("$ideographicSpace$ideographicSpace# item 3");
21+
$osheet->getCell('A4')->setValue("$nbsp# item\t4");
22+
$osheet->getCell('A5')->setValue("$nbsp$nbsp# item 5");
23+
24+
$spreadsheet = $this->writeAndReload($spreadsheetOld, 'Html');
25+
$spreadsheetOld->disconnectWorksheets();
26+
27+
$sheet = $spreadsheet->getActiveSheet();
28+
self::assertSame('# item 1', $sheet->getCell('A1')->getValue(), 'nothing changed');
29+
self::assertSame("$ideographicSpace# item 2", $sheet->getCell('A2')->getValue(), 'nothing changed including 1 ideographic space');
30+
self::assertSame("$ideographicSpace$ideographicSpace# item 3", $sheet->getCell('A3')->getValue(), 'nothing changed including 2 ideographic spaces');
31+
self::assertSame("$nbsp# item 4", $sheet->getCell('A4')->getValue(), 'nbsp unchanged, 2 spaces reduced to 1, tab changed to space');
32+
self::assertSame("$nbsp$nbsp# item 5", $sheet->getCell('A5')->getValue(), 'many spaces reduced to 1');
33+
34+
$spreadsheet->disconnectWorksheets();
35+
}
36+
}

0 commit comments

Comments
 (0)