Skip to content

Commit 0c2708b

Browse files
committed
Xlsx Reader Optionally Ignore Rows With No Cells
Fix #3982. A number of issues submitted about Xlsx read performance have a common theme, namely that row 1,048,576 and a few rows before it are defined in the worksheet Xml with no cells attached to them. These might be the work of a third party product. While these extraneous rows do not cause any problems for the cells that are actually used on the worksheet, they can lead to excessive memory use. This PR provides an option for the application to ignore rows with no cells when loading. Recent changes to the load logic had already made a significant difference to memory consumption and load time. For the spreadsheet attached to issue 3982, which had caused out-of-memory errors on the user's system, peak memory usage was already reduced to 40-odd MB. With the new option, this is drastically reduced again, to just over 9MB. Specifying the new option is very easy: ```php $reader->setIgnoreRowsWithNoCells(true); ``` Note that there are cases where you might not want this (non-default) behavior. For example, if you set a row height on a row with no cells, the height would be lost with this option. Unfortunately, the extraneous row definitions in the problematic spreadsheets claim to have a custom height, so I can't just use "no custom row styles" as an additional filter.
1 parent 2ed696f commit 0c2708b

File tree

6 files changed

+71
-6
lines changed

6 files changed

+71
-6
lines changed

src/PhpSpreadsheet/Reader/BaseReader.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ abstract class BaseReader implements IReader
3939
*/
4040
protected ?array $loadSheetsOnly = null;
4141

42+
/**
43+
* Ignore rows with no cells?
44+
* Identifies whether the Reader should ignore rows with no cells.
45+
* Currently implemented only for Xlsx.
46+
*/
47+
protected bool $ignoreRowsWithNoCells = false;
48+
4249
/**
4350
* IReadFilter instance.
4451
*/
@@ -78,6 +85,18 @@ public function setReadEmptyCells(bool $readEmptyCells): self
7885
return $this;
7986
}
8087

88+
public function getIgnoreRowsWithNoCells(): bool
89+
{
90+
return $this->ignoreRowsWithNoCells;
91+
}
92+
93+
public function setIgnoreRowsWithNoCells(bool $ignoreRowsWithNoCells): self
94+
{
95+
$this->ignoreRowsWithNoCells = $ignoreRowsWithNoCells;
96+
97+
return $this;
98+
}
99+
81100
public function getIncludeCharts(): bool
82101
{
83102
return $this->includeCharts;
@@ -150,6 +169,9 @@ protected function processFlags(int $flags): void
150169
if (((bool) ($flags & self::SKIP_EMPTY_CELLS) || (bool) ($flags & self::IGNORE_EMPTY_CELLS)) === true) {
151170
$this->setReadEmptyCells(false);
152171
}
172+
if (((bool) ($flags & self::IGNORE_ROWS_WITH_NO_CELLS)) === true) {
173+
$this->setIgnoreRowsWithNoCells(true);
174+
}
153175
}
154176

155177
protected function loadSpreadsheetFromFile(string $filename): Spreadsheet

src/PhpSpreadsheet/Reader/IReader.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ interface IReader
1313
public const SKIP_EMPTY_CELLS = 4;
1414
public const IGNORE_EMPTY_CELLS = 4;
1515

16+
public const IGNORE_ROWS_WITH_NO_CELLS = 8;
17+
1618
public function __construct();
1719

1820
/**

src/PhpSpreadsheet/Reader/Xlsx.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,10 +796,10 @@ protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
796796
}
797797

798798
$sheetViewOptions = new SheetViewOptions($docSheet, $xmlSheetNS);
799-
$sheetViewOptions->load($this->getReadDataOnly(), $this->styleReader);
799+
$sheetViewOptions->load($this->readDataOnly, $this->styleReader);
800800

801801
(new ColumnAndRowAttributes($docSheet, $xmlSheetNS))
802-
->load($this->getReadFilter(), $this->getReadDataOnly());
802+
->load($this->getReadFilter(), $this->readDataOnly, $this->ignoreRowsWithNoCells);
803803
}
804804

805805
$holdSelectedCells = $docSheet->getSelectedCells();

src/PhpSpreadsheet/Reader/Xlsx/ColumnAndRowAttributes.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ private function setRowAttributes(int $rowNumber, array $rowAttributes): void
7272
}
7373
}
7474

75-
public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false): void
75+
public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false, bool $ignoreRowsWithNoCells = false): void
7676
{
7777
if ($this->worksheetXml === null) {
7878
return;
@@ -85,7 +85,7 @@ public function load(?IReadFilter $readFilter = null, bool $readDataOnly = false
8585
}
8686

8787
if ($this->worksheetXml->sheetData && $this->worksheetXml->sheetData->row) {
88-
$rowsAttributes = $this->readRowAttributes($this->worksheetXml->sheetData->row, $readDataOnly);
88+
$rowsAttributes = $this->readRowAttributes($this->worksheetXml->sheetData->row, $readDataOnly, $ignoreRowsWithNoCells);
8989
}
9090

9191
if ($readFilter !== null && $readFilter::class === DefaultReadFilter::class) {
@@ -189,13 +189,13 @@ private function isFilteredRow(IReadFilter $readFilter, int $rowCoordinate, arra
189189
return false;
190190
}
191191

192-
private function readRowAttributes(SimpleXMLElement $worksheetRow, bool $readDataOnly): array
192+
private function readRowAttributes(SimpleXMLElement $worksheetRow, bool $readDataOnly, bool $ignoreRowsWithNoCells): array
193193
{
194194
$rowAttributes = [];
195195

196196
foreach ($worksheetRow as $rowx) {
197197
$row = $rowx->attributes();
198-
if ($row !== null) {
198+
if ($row !== null && (!$ignoreRowsWithNoCells || isset($rowx->c))) {
199199
if (isset($row['ht']) && !$readDataOnly) {
200200
$rowAttributes[(int) $row['r']]['rowHeight'] = (float) $row['ht'];
201201
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Xlsx;
6+
7+
use PhpOffice\PhpSpreadsheet\IOFactory;
8+
use PhpOffice\PhpSpreadsheet\Reader\IReader;
9+
use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader;
10+
11+
class Issue3982Test extends \PHPUnit\Framework\TestCase
12+
{
13+
private static string $testbook = 'tests/data/Reader/XLSX/issue.3982.xlsx';
14+
15+
public function testLoadAllRows(): void
16+
{
17+
$spreadsheet = IOFactory::load(self::$testbook);
18+
$sheet = $spreadsheet->getActiveSheet();
19+
$data = $sheet->toArray(null, true, false, true);
20+
self::assertCount(1048576, $data);
21+
$spreadsheet->disconnectWorksheets();
22+
}
23+
24+
public function testIgnoreCellsWithNoRows(): void
25+
{
26+
$spreadsheet = IOFactory::load(self::$testbook, IReader::IGNORE_ROWS_WITH_NO_CELLS);
27+
$sheet = $spreadsheet->getActiveSheet();
28+
$data = $sheet->toArray(null, true, false, true);
29+
self::assertSame([1, 2, 3, 4, 5, 6], array_keys($data));
30+
$spreadsheet->disconnectWorksheets();
31+
}
32+
33+
public function testDefaultSetting(): void
34+
{
35+
$reader = new XlsxReader();
36+
self::assertFalse($reader->getIgnoreRowsWithNoCells());
37+
self::assertFalse($reader->getReadDataOnly());
38+
self::assertFalse($reader->getIncludeCharts());
39+
self::assertTrue($reader->getReadEmptyCells());
40+
}
41+
}
20.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)