Skip to content

Commit 4a7fa14

Browse files
authored
Merge pull request #4007 from oleibman/issue4001
Xml Reader Rich Text
2 parents 8485cd1 + d2a69ae commit 4a7fa14

File tree

4 files changed

+189
-7
lines changed

4 files changed

+189
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
4444
- Protect Sheet But Allow Sort [Issue #3951](https://github.com/PHPOffice/PhpSpreadsheet/issues/3951) [PR #3956](https://github.com/PHPOffice/PhpSpreadsheet/pull/3956)
4545
- Default Value for Conditional::$text [PR #3946](https://github.com/PHPOffice/PhpSpreadsheet/pull/3946)
4646
- Table Filter Buttons [Issue #3988](https://github.com/PHPOffice/PhpSpreadsheet/issues/3988) [PR #3992](https://github.com/PHPOffice/PhpSpreadsheet/pull/3992)
47-
- Improvements to Xml Reader [Issue #3999](https://github.com/PHPOffice/PhpSpreadsheet/issues/3999) [Issue #4000](https://github.com/PHPOffice/PhpSpreadsheet/issues/4000) [Issue #4002](https://github.com/PHPOffice/PhpSpreadsheet/issues/4002) [PR #4003](https://github.com/PHPOffice/PhpSpreadsheet/pull/4003)
47+
- Improvements to Xml Reader [Issue #3999](https://github.com/PHPOffice/PhpSpreadsheet/issues/3999) [Issue #4000](https://github.com/PHPOffice/PhpSpreadsheet/issues/4000) [Issue #4001](https://github.com/PHPOffice/PhpSpreadsheet/issues/4001) [Issue #4002](https://github.com/PHPOffice/PhpSpreadsheet/issues/4002) [PR #4003](https://github.com/PHPOffice/PhpSpreadsheet/pull/4003) [PR #4007](https://github.com/PHPOffice/PhpSpreadsheet/pull/4007)
4848

4949
## 2.0.0 - 2024-01-04
5050

src/PhpSpreadsheet/Helper/Html.php

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,8 @@ class Html
595595

596596
private RichText $richTextObject;
597597

598+
private bool $preserveWhiteSpace = false;
599+
598600
private function initialise(): void
599601
{
600602
$this->face = $this->size = $this->color = null;
@@ -608,7 +610,7 @@ private function initialise(): void
608610
/**
609611
* Parse HTML formatting and return the resulting RichText.
610612
*/
611-
public function toRichTextObject(string $html): RichText
613+
public function toRichTextObject(string $html, bool $preserveWhiteSpace = false): RichText
612614
{
613615
$this->initialise();
614616

@@ -622,7 +624,9 @@ public function toRichTextObject(string $html): RichText
622624
$dom->preserveWhiteSpace = false;
623625

624626
$this->richTextObject = new RichText();
627+
$this->preserveWhiteSpace = $preserveWhiteSpace;
625628
$this->parseElements($dom);
629+
$this->preserveWhiteSpace = false;
626630

627631
// Clean any further spurious whitespace
628632
$this->cleanWhitespace();
@@ -706,6 +710,7 @@ protected function startFontTag(DOMElement $tag): void
706710
if ($attrs !== null) {
707711
foreach ($attrs as $attribute) {
708712
$attributeName = strtolower($attribute->name);
713+
$attributeName = preg_replace('/^html:/', '', $attributeName) ?? $attributeName; // in case from Xml spreadsheet
709714
$attributeValue = $attribute->value;
710715

711716
if ($attributeName == 'color') {
@@ -795,11 +800,15 @@ public function breakTag(): void
795800

796801
private function parseTextNode(DOMText $textNode): void
797802
{
798-
$domText = (string) preg_replace(
799-
'/\s+/u',
800-
' ',
801-
str_replace(["\r", "\n"], ' ', $textNode->nodeValue ?? '')
802-
);
803+
if ($this->preserveWhiteSpace) {
804+
$domText = $textNode->nodeValue ?? '';
805+
} else {
806+
$domText = (string) preg_replace(
807+
'/\s+/u',
808+
' ',
809+
str_replace(["\r", "\n"], ' ', $textNode->nodeValue ?? '')
810+
);
811+
}
803812
$this->stringData .= $domText;
804813
$this->buildTextRun();
805814
}

src/PhpSpreadsheet/Reader/Xml.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
99
use PhpOffice\PhpSpreadsheet\Cell\DataType;
1010
use PhpOffice\PhpSpreadsheet\DefinedName;
11+
use PhpOffice\PhpSpreadsheet\Helper\Html as HelperHtml;
1112
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1213
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Namespaces;
1314
use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings;
@@ -459,6 +460,14 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet, boo
459460
*/
460461
case 'String':
461462
$type = DataType::TYPE_STRING;
463+
$rich = $cellData->children('http://www.w3.org/TR/REC-html40');
464+
if ($rich) {
465+
// in case of HTML content we extract the payload
466+
// and convert it into a rich text object
467+
$content = $cellData->asXML() ?: '';
468+
$html = new HelperHtml();
469+
$cellValue = $html->toRichTextObject($content, true);
470+
}
462471

463472
break;
464473
case 'Number':
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Xml;
6+
7+
use PhpOffice\PhpSpreadsheet\Reader\Xml;
8+
use PhpOffice\PhpSpreadsheet\RichText\RichText;
9+
use PhpOffice\PhpSpreadsheet\RichText\Run;
10+
use PHPUnit\Framework\TestCase;
11+
12+
class XmlRichTextTest extends TestCase
13+
{
14+
public function testBreakTag(): void
15+
{
16+
$xmldata = <<< 'EOT'
17+
<?xml version="1.0"?>
18+
<?mso-application progid="Excel.Sheet"?>
19+
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
20+
xmlns:o="urn:schemas-microsoft-com:office:office"
21+
xmlns:x="urn:schemas-microsoft-com:office:excel"
22+
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
23+
xmlns:html="http://www.w3.org/TR/REC-html40">
24+
<Worksheet ss:Name="Test">
25+
<ss:Table>
26+
<ss:Row>
27+
<ss:Cell>
28+
<ss:Data ss:Type="String" xmlns="http://www.w3.org/TR/REC-html40"><I>italic</I><B>bold</B><BR />second line</ss:Data>
29+
</ss:Cell>
30+
</ss:Row>
31+
</ss:Table>
32+
</Worksheet>
33+
</Workbook>
34+
EOT;
35+
$reader = new Xml();
36+
$spreadsheet = $reader->loadSpreadsheetFromString($xmldata);
37+
self::assertEquals(1, $spreadsheet->getSheetCount());
38+
39+
$sheet = $spreadsheet->getActiveSheet();
40+
self::assertEquals('Test', $sheet->getTitle());
41+
$richText = $sheet->getCell('A1')->getValue();
42+
self::assertInstanceOf(RichText::class, $richText);
43+
$elements = $richText->getRichTextElements();
44+
self::assertCount(3, $elements);
45+
$run = $elements[0];
46+
self::assertInstanceOf(Run::class, $run);
47+
self::assertSame('italic', $run->getText());
48+
self::assertNotNull($run->getFont());
49+
self::assertTrue($run->getFont()->getItalic());
50+
self::assertFalse($run->getFont()->getBold());
51+
52+
$run = $elements[1];
53+
self::assertInstanceOf(Run::class, $run);
54+
self::assertSame('bold', $run->getText());
55+
self::assertNotNull($run->getFont());
56+
self::assertFalse($run->getFont()->getItalic());
57+
self::assertTrue($run->getFont()->getBold());
58+
59+
$run = $elements[2];
60+
self::assertInstanceOf(Run::class, $run);
61+
self::assertSame("\nsecond line", $run->getText());
62+
63+
$spreadsheet->disconnectWorksheets();
64+
}
65+
66+
public function testNewlineAndFontTag(): void
67+
{
68+
$xmldata = <<< 'EOT'
69+
<?xml version="1.0"?>
70+
<?mso-application progid="Excel.Sheet"?>
71+
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
72+
xmlns:o="urn:schemas-microsoft-com:office:office"
73+
xmlns:x="urn:schemas-microsoft-com:office:excel"
74+
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
75+
xmlns:html="http://www.w3.org/TR/REC-html40">
76+
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
77+
<LastAuthor>Owen Leibman</LastAuthor>
78+
<Created>2024-04-28T06:03:14Z</Created>
79+
<Version>16.00</Version>
80+
</DocumentProperties>
81+
<OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
82+
<AllowPNG/>
83+
</OfficeDocumentSettings>
84+
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
85+
<WindowHeight>6510</WindowHeight>
86+
<WindowWidth>19200</WindowWidth>
87+
<WindowTopX>32767</WindowTopX>
88+
<WindowTopY>32767</WindowTopY>
89+
<ProtectStructure>False</ProtectStructure>
90+
<ProtectWindows>False</ProtectWindows>
91+
</ExcelWorkbook>
92+
<Styles>
93+
<Style ss:ID="Default" ss:Name="Normal">
94+
<Alignment ss:Vertical="Bottom"/>
95+
<Borders/>
96+
<Font ss:FontName="Aptos Narrow" x:Family="Swiss" ss:Size="11"
97+
ss:Color="#000000"/>
98+
<Interior/>
99+
<NumberFormat/>
100+
<Protection/>
101+
</Style>
102+
<Style ss:ID="s63">
103+
<Alignment ss:Vertical="Bottom" ss:WrapText="1"/>
104+
<Borders/>
105+
<Font ss:FontName="Aptos Narrow" x:Family="Swiss" ss:Size="11" ss:Italic="1"/>
106+
<Interior/>
107+
<NumberFormat/>
108+
<Protection/>
109+
</Style>
110+
</Styles>
111+
<Worksheet ss:Name="Test">
112+
<Table ss:ExpandedColumnCount="1" ss:ExpandedRowCount="1" x:FullColumns="1"
113+
x:FullRows="1" ss:DefaultRowHeight="14.5">
114+
<Row ss:AutoFitHeight="0" ss:Height="47.5">
115+
<Cell ss:StyleID="s63"><ss:Data ss:Type="String"
116+
xmlns="http://www.w3.org/TR/REC-html40"><I>italic</I><B>bold&#10;</B><Font
117+
html:Color="#FF0000">second</Font><Font> line</Font></ss:Data></Cell>
118+
</Row>
119+
</Table>
120+
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
121+
<Unsynced/>
122+
<Selected/>
123+
<ProtectObjects>False</ProtectObjects>
124+
<ProtectScenarios>False</ProtectScenarios>
125+
</WorksheetOptions>
126+
</Worksheet>
127+
</Workbook>
128+
EOT;
129+
$reader = new Xml();
130+
$spreadsheet = $reader->loadSpreadsheetFromString($xmldata);
131+
self::assertEquals(1, $spreadsheet->getSheetCount());
132+
133+
$sheet = $spreadsheet->getActiveSheet();
134+
self::assertEquals('Test', $sheet->getTitle());
135+
$richText = $sheet->getCell('A1')->getValue();
136+
self::assertInstanceOf(RichText::class, $richText);
137+
$elements = $richText->getRichTextElements();
138+
self::assertCount(4, $elements);
139+
$run = $elements[0];
140+
self::assertInstanceOf(Run::class, $run);
141+
self::assertSame('italic', $run->getText());
142+
self::assertNotNull($run->getFont());
143+
self::assertTrue($run->getFont()->getItalic());
144+
self::assertFalse($run->getFont()->getBold());
145+
146+
$run = $elements[1];
147+
self::assertInstanceOf(Run::class, $run);
148+
self::assertSame("bold\n", $run->getText());
149+
self::assertNotNull($run->getFont());
150+
self::assertFalse($run->getFont()->getItalic());
151+
self::assertTrue($run->getFont()->getBold());
152+
153+
$run = $elements[2];
154+
self::assertInstanceOf(Run::class, $run);
155+
self::assertSame('second', $run->getText());
156+
self::assertSame('FF0000', $run->getFont()?->getColor()->getRgb());
157+
158+
$run = $elements[3];
159+
self::assertInstanceOf(Run::class, $run);
160+
self::assertSame(' line', $run->getText());
161+
162+
$spreadsheet->disconnectWorksheets();
163+
}
164+
}

0 commit comments

Comments
 (0)