Skip to content

Commit e76b701

Browse files
authored
Merge pull request #2507 from oleibman/word2493b
ODText Reader : Improve Section Reader
2 parents b0e1e41 + bdcd104 commit e76b701

File tree

6 files changed

+171
-11
lines changed

6 files changed

+171
-11
lines changed

docs/changes/1.x/1.2.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
- Added Support for Language, both for document overall and individual text elements
3232
- Template : Set a checkbox by [@nxtpge](https://github.com/nxtpge) in [#2509](https://github.com/PHPOffice/PHPWord/pull/2509)
3333
- ODText / RTF / Word2007 Writer : Add field FILENAME by [@milkyway-git](https://github.com/milkyway-git) in [#2510](https://github.com/PHPOffice/PHPWord/pull/2510)
34+
- ODText Reader : Improve Section Reader by [@oleibman](https://github.com/oleibman) in [#2507](https://github.com/PHPOffice/PHPWord/pull/2507)
3435

3536
### Bug fixes
3637

phpstan-baseline.neon

-5
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,6 @@ parameters:
165165
count: 1
166166
path: src/PhpWord/Reader/HTML.php
167167

168-
-
169-
message: "#^Call to an undefined method DOMNode\\:\\:getAttribute\\(\\)\\.$#"
170-
count: 2
171-
path: src/PhpWord/Reader/ODText/Content.php
172-
173168
-
174169
message: "#^Offset 'textNodes' on array\\{changed\\: PhpOffice\\\\PhpWord\\\\Element\\\\TrackChange, textNodes\\: DOMNodeList\\<DOMElement\\>\\} in isset\\(\\) always exists and is not nullable\\.$#"
175170
count: 1

src/PhpWord/Element/TextRun.php

+12
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,16 @@ public function setParagraphStyle($style = null)
7878

7979
return $this->paragraphStyle;
8080
}
81+
82+
public function getText(): string
83+
{
84+
$outstr = '';
85+
foreach ($this->getElements() as $element) {
86+
if ($element instanceof Text) {
87+
$outstr .= $element->getText();
88+
}
89+
}
90+
91+
return $outstr;
92+
}
8193
}

src/PhpWord/Reader/ODText/Content.php

+75-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
namespace PhpOffice\PhpWord\Reader\ODText;
1919

2020
use DateTime;
21+
use DOMElement;
22+
use DOMNodeList;
2123
use PhpOffice\Math\Reader\MathML;
24+
use PhpOffice\PhpWord\Element\Section;
2225
use PhpOffice\PhpWord\Element\TrackChange;
2326
use PhpOffice\PhpWord\PhpWord;
2427
use PhpOffice\PhpWord\Shared\XMLReader;
@@ -30,6 +33,9 @@
3033
*/
3134
class Content extends AbstractPart
3235
{
36+
/** @var ?Section */
37+
private $section;
38+
3339
/**
3440
* Read content.xml.
3541
*/
@@ -41,17 +47,28 @@ public function read(PhpWord $phpWord): void
4147
$trackedChanges = [];
4248

4349
$nodes = $xmlReader->getElements('office:body/office:text/*');
50+
$this->section = null;
51+
$this->processNodes($nodes, $xmlReader, $phpWord);
52+
$this->section = null;
53+
}
54+
55+
/** @param DOMNodeList<DOMElement> $nodes */
56+
public function processNodes(DOMNodeList $nodes, XMLReader $xmlReader, PhpWord $phpWord): void
57+
{
4458
if ($nodes->length > 0) {
45-
$section = $phpWord->addSection();
4659
foreach ($nodes as $node) {
4760
// $styleName = $xmlReader->getAttribute('text:style-name', $node);
4861
switch ($node->nodeName) {
4962
case 'text:h': // Heading
5063
$depth = $xmlReader->getAttribute('text:outline-level', $node);
51-
$section->addTitle($node->nodeValue, $depth);
64+
$this->getSection($phpWord)->addTitle($node->nodeValue, $depth);
5265

5366
break;
5467
case 'text:p': // Paragraph
68+
$styleName = $xmlReader->getAttribute('text:style-name', $node);
69+
if (substr($styleName, 0, 2) === 'SB') {
70+
break;
71+
}
5572
$element = $xmlReader->getElement('draw:frame/draw:object', $node);
5673
if ($element) {
5774
$mathFile = str_replace('./', '', $element->getAttribute('xlink:href')) . '/content.xml';
@@ -65,11 +82,13 @@ public function read(PhpWord $phpWord): void
6582
$reader = new MathML();
6683
$math = $reader->read($mathXML);
6784

68-
$section->addFormula($math);
85+
$this->getSection($phpWord)->addFormula($math);
6986
}
7087
}
7188
} else {
7289
$children = $node->childNodes;
90+
$spans = false;
91+
/** @var DOMElement $child */
7392
foreach ($children as $child) {
7493
switch ($child->nodeName) {
7594
case 'text:change-start':
@@ -89,16 +108,49 @@ public function read(PhpWord $phpWord): void
89108
$changed = $trackedChanges[$changeId];
90109
}
91110

111+
break;
112+
case 'text:span':
113+
$spans = true;
114+
92115
break;
93116
}
94117
}
95118

96-
$element = $section->addText($node->nodeValue);
119+
if ($spans) {
120+
$element = $this->getSection($phpWord)->addTextRun();
121+
foreach ($children as $child) {
122+
switch ($child->nodeName) {
123+
case 'text:span':
124+
/** @var DOMElement $child2 */
125+
foreach ($child->childNodes as $child2) {
126+
switch ($child2->nodeName) {
127+
case '#text':
128+
$element->addText($child2->nodeValue);
129+
130+
break;
131+
case 'text:tab':
132+
$element->addText("\t");
133+
134+
break;
135+
case 'text:s':
136+
$spaces = (int) $child2->getAttribute('text:c') ?: 1;
137+
$element->addText(str_repeat(' ', $spaces));
138+
139+
break;
140+
}
141+
}
142+
143+
break;
144+
}
145+
}
146+
} else {
147+
$element = $this->getSection($phpWord)->addText($node->nodeValue);
148+
}
97149
if (isset($changed) && is_array($changed)) {
98150
$element->setTrackChange($changed['changed']);
99151
if (isset($changed['textNodes'])) {
100152
foreach ($changed['textNodes'] as $changedNode) {
101-
$element = $section->addText($changedNode->nodeValue);
153+
$element = $this->getSection($phpWord)->addText($changedNode->nodeValue);
102154
$element->setTrackChange($changed['changed']);
103155
}
104156
}
@@ -110,7 +162,7 @@ public function read(PhpWord $phpWord): void
110162
$listItems = $xmlReader->getElements('text:list-item/text:p', $node);
111163
foreach ($listItems as $listItem) {
112164
// $listStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
113-
$section->addListItem($listItem->nodeValue, 0);
165+
$this->getSection($phpWord)->addListItem($listItem->nodeValue, 0);
114166
}
115167

116168
break;
@@ -129,9 +181,26 @@ public function read(PhpWord $phpWord): void
129181
$trackedChanges[$changedRegion->getAttribute('text:id')] = ['changed' => $changed, 'textNodes' => $textNodes];
130182
}
131183

184+
break;
185+
case 'text:section': // Section
186+
// $sectionStyleName = $xmlReader->getAttribute('text:style-name', $listItem);
187+
$this->section = $phpWord->addSection();
188+
$children = $node->childNodes;
189+
$this->processNodes($children, $xmlReader, $phpWord);
190+
132191
break;
133192
}
134193
}
135194
}
136195
}
196+
197+
private function getSection(PhpWord $phpWord): Section
198+
{
199+
$section = $this->section;
200+
if ($section === null) {
201+
$section = $this->section = $phpWord->addSection();
202+
}
203+
204+
return $section;
205+
}
137206
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
<?php
2+
/**
3+
* This file is part of PHPWord - A pure PHP library for reading and writing
4+
* word processing documents.
5+
*
6+
* PHPWord is free software distributed under the terms of the GNU Lesser
7+
* General Public License version 3 as published by the Free Software Foundation.
8+
*
9+
* For the full copyright and license information, please read the LICENSE
10+
* file that was distributed with this source code. For the full list of
11+
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12+
*
13+
* @see https://github.com/PHPOffice/PHPWord
14+
*
15+
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16+
*/
17+
18+
namespace PhpOffice\PhpWordTests\Reader\ODText;
19+
20+
use PhpOffice\PhpWord\IOFactory;
21+
use PhpOffice\PhpWord\PhpWord;
22+
use PhpOffice\PhpWord\Settings;
23+
24+
class ODTextSectionTest extends \PHPUnit\Framework\TestCase
25+
{
26+
/** @var string */
27+
private $filename = '';
28+
29+
protected function tearDown(): void
30+
{
31+
if ($this->filename !== '') {
32+
unlink($this->filename);
33+
$this->filename = '';
34+
}
35+
}
36+
37+
public function testWriteThenReadSection(): void
38+
{
39+
$dir = 'tests/PhpWordTests/_files';
40+
Settings::setOutputEscapingEnabled(true);
41+
$phpWord = new PhpWord();
42+
$section = $phpWord->addSection();
43+
$inputText = ['days', 'monday', 'tuesday'];
44+
$inputText[] = "Tab\tthen two spaces then done.";
45+
foreach ($inputText as $text) {
46+
$section->addText($text);
47+
}
48+
$writer = IOFactory::createWriter($phpWord, 'ODText');
49+
$this->filename = "$dir/sectiontest.odt";
50+
$writer->save($this->filename);
51+
52+
$reader = IOFactory::createReader('ODText');
53+
$phpWord2 = $reader->load($this->filename);
54+
$outputText = [];
55+
foreach ($phpWord2->getSections() as $section) {
56+
foreach ($section->getElements() as $element) {
57+
if (is_object($element) && method_exists($element, 'getText')) {
58+
$outputText[] = $element->getText();
59+
}
60+
}
61+
}
62+
self::assertSame($inputText, $outputText);
63+
}
64+
65+
public function testReadNoSections(): void
66+
{
67+
$dir = 'tests/PhpWordTests/_files/documents';
68+
$inputText = ['days', 'monday', 'tuesday'];
69+
70+
$reader = IOFactory::createReader('ODText');
71+
$filename = "$dir/word.2493.nosection.odt";
72+
$phpWord2 = $reader->load($filename);
73+
$outputText = [];
74+
foreach ($phpWord2->getSections() as $section) {
75+
foreach ($section->getElements() as $element) {
76+
if (is_object($element) && method_exists($element, 'getText')) {
77+
$outputText[] = $element->getText();
78+
}
79+
}
80+
}
81+
self::assertSame($inputText, $outputText);
82+
}
83+
}
Binary file not shown.

0 commit comments

Comments
 (0)