Skip to content

Commit a096fcc

Browse files
committed
Formula Misidentifying Text as Cell After Insertion/Deletion
Fix #3907. After row/column insertion/deletion, PhpSpreadsheet updates formulas which include cells which have moved. However, it can mis-identify cell addresses within the formula. Examples: - `=SUM(A2,'F1 (SETTINGS)'!A1:B1)` It identifes F1 as a cell address. - `=SUM(A2,'x F1 (SETTINGS)'!A1:B1)` It identifes F1 as a cell address. (This looks the same as the above, but, for technical reasons, it's different.) - `=SUM(A2,definedname1A1)` It identifes A1 as a cell address. - Sheet names in formulas are compared case-sensitively, and should be compared insensitively. This can make a difference if the formula includes its own sheet name, e.g. on sheet `Data`, formula `=SUM(DATA!A1:A2)` might have to change, but it will not do so with the existing logic. The defined name part is fairly straightforward. The regular expressions that identify a cell address just have to be a bit more robust. It was doing a negative look-behind for an alphabetic character or dollar sign; underscore, period, and digits, all of which can be part of a defined name, need to be added to that list. The other situations need a bit of a kludge, but not one so bad that I'm ashamed of it. The formulas will be altered before analysis so that sheet names are replaced with Unicode FFFD (sheetname does not match current sheet), FFFC (sheetname, enclosed in apostrophes, matches current sheet), and FFFB (sheetname, not enclosed in apostrophes, matches current sheet). This prevents the existing regular expressions from finding a cell address within a sheet name, and makes it easy to restore the original, with or without apostrophes, when the sheet name matches the current sheet and the cell(s) which it qualifies have to be changed. Tests are added for all the situations mentioned above. No existing tests required changes.
1 parent d620497 commit a096fcc

File tree

2 files changed

+88
-28
lines changed

2 files changed

+88
-28
lines changed

src/PhpSpreadsheet/ReferenceHelper.php

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@ class ReferenceHelper
1515
{
1616
/** Constants */
1717
/** Regular Expressions */
18-
const REFHELPER_REGEXP_CELLREF = '((\w*|\'[^!]*\')!)?(?<![:a-z\$])(\$?[a-z]{1,3}\$?\d+)(?=[^:!\d\'])';
19-
const REFHELPER_REGEXP_CELLRANGE = '((\w*|\'[^!]*\')!)?(\$?[a-z]{1,3}\$?\d+):(\$?[a-z]{1,3}\$?\d+)';
20-
const REFHELPER_REGEXP_ROWRANGE = '((\w*|\'[^!]*\')!)?(\$?\d+):(\$?\d+)';
21-
const REFHELPER_REGEXP_COLRANGE = '((\w*|\'[^!]*\')!)?(\$?[a-z]{1,3}):(\$?[a-z]{1,3})';
18+
private const SHEETNAME_PART = '((\w*|\'[^!]*\')!)';
19+
private const SHEETNAME_PART_WITH_SLASHES = '/' . self::SHEETNAME_PART . '/';
20+
const REFHELPER_REGEXP_CELLREF = self::SHEETNAME_PART . '?(?<![:a-z1-9_\.\$])(\$?[a-z]{1,3}\$?\d+)(?=[^:!\d\'])';
21+
const REFHELPER_REGEXP_CELLRANGE = self::SHEETNAME_PART . '?(\$?[a-z]{1,3}\$?\d+):(\$?[a-z]{1,3}\$?\d+)';
22+
const REFHELPER_REGEXP_ROWRANGE = self::SHEETNAME_PART . '?(\$?\d+):(\$?\d+)';
23+
const REFHELPER_REGEXP_COLRANGE = self::SHEETNAME_PART . '?(\$?[a-z]{1,3}):(\$?[a-z]{1,3})';
2224

2325
/**
2426
* Instance of this class.
@@ -545,6 +547,18 @@ public function insertNewBefore(
545547
$worksheet->garbageCollect();
546548
}
547549

550+
private static function matchSheetName(?string $match, string $worksheetName): bool
551+
{
552+
return $match === null || $match === '' || $match === "'\u{fffc}'" || $match === "'\u{fffb}'" || strcasecmp(trim($match, "'"), $worksheetName) === 0;
553+
}
554+
555+
private static function sheetnameBeforeCells(string $match, string $worksheetName, string $cells): string
556+
{
557+
$toString = ($match > '') ? "$match!" : '';
558+
559+
return str_replace(["\u{fffc}", "'\u{fffb}'"], $worksheetName, $toString) . $cells;
560+
}
561+
548562
/**
549563
* Update references within formulas.
550564
*
@@ -565,6 +579,7 @@ public function updateFormulaReferences(
565579
bool $includeAbsoluteReferences = false,
566580
bool $onlyAbsoluteReferences = false
567581
): string {
582+
$callback = fn (array $matches): string => (strcasecmp(trim($matches[2], "'"), $worksheetName) === 0) ? (($matches[2][0] === "'") ? "'\u{fffc}'!" : "'\u{fffb}'!") : "'\u{fffd}'!";
568583
if (
569584
$this->cellReferenceHelper === null
570585
|| $this->cellReferenceHelper->refreshRequired($beforeCellAddress, $numberOfColumns, $numberOfRows)
@@ -582,18 +597,17 @@ public function updateFormulaReferences(
582597
$adjustCount = 0;
583598
$newCellTokens = $cellTokens = [];
584599
// Search for row ranges (e.g. 'Sheet1'!3:5 or 3:5) with or without $ absolutes (e.g. $3:5)
585-
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_ROWRANGE . '/mui', ' ' . $formulaBlock . ' ', $matches, PREG_SET_ORDER);
600+
$formulaBlockx = ' ' . (preg_replace_callback(self::SHEETNAME_PART_WITH_SLASHES, $callback, $formulaBlock) ?? $formulaBlock) . ' ';
601+
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_ROWRANGE . '/mui', $formulaBlockx, $matches, PREG_SET_ORDER);
586602
if ($matchCount > 0) {
587603
foreach ($matches as $match) {
588-
$fromString = ($match[2] > '') ? $match[2] . '!' : '';
589-
$fromString .= $match[3] . ':' . $match[4];
604+
$fromString = self::sheetnameBeforeCells($match[2], $worksheetName, "{$match[3]}:{$match[4]}");
590605
$modified3 = substr($this->updateCellReference('$A' . $match[3], $includeAbsoluteReferences, $onlyAbsoluteReferences), 2);
591606
$modified4 = substr($this->updateCellReference('$A' . $match[4], $includeAbsoluteReferences, $onlyAbsoluteReferences), 2);
592607

593608
if ($match[3] . ':' . $match[4] !== $modified3 . ':' . $modified4) {
594-
if (($match[2] == '') || (trim($match[2], "'") == $worksheetName)) {
595-
$toString = ($match[2] > '') ? $match[2] . '!' : '';
596-
$toString .= $modified3 . ':' . $modified4;
609+
if (self::matchSheetName($match[2], $worksheetName)) {
610+
$toString = self::sheetnameBeforeCells($match[2], $worksheetName, "$modified3:$modified4");
597611
// Max worksheet size is 1,048,576 rows by 16,384 columns in Excel 2007, so our adjustments need to be at least one digit more
598612
$column = 100000;
599613
$row = 10000000 + (int) trim($match[3], '$');
@@ -607,18 +621,17 @@ public function updateFormulaReferences(
607621
}
608622
}
609623
// Search for column ranges (e.g. 'Sheet1'!C:E or C:E) with or without $ absolutes (e.g. $C:E)
610-
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_COLRANGE . '/mui', ' ' . $formulaBlock . ' ', $matches, PREG_SET_ORDER);
624+
$formulaBlockx = ' ' . (preg_replace_callback(self::SHEETNAME_PART_WITH_SLASHES, $callback, $formulaBlock) ?? $formulaBlock) . ' ';
625+
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_COLRANGE . '/mui', $formulaBlockx, $matches, PREG_SET_ORDER);
611626
if ($matchCount > 0) {
612627
foreach ($matches as $match) {
613-
$fromString = ($match[2] > '') ? $match[2] . '!' : '';
614-
$fromString .= $match[3] . ':' . $match[4];
628+
$fromString = self::sheetnameBeforeCells($match[2], $worksheetName, "{$match[3]}:{$match[4]}");
615629
$modified3 = substr($this->updateCellReference($match[3] . '$1', $includeAbsoluteReferences, $onlyAbsoluteReferences), 0, -2);
616630
$modified4 = substr($this->updateCellReference($match[4] . '$1', $includeAbsoluteReferences, $onlyAbsoluteReferences), 0, -2);
617631

618632
if ($match[3] . ':' . $match[4] !== $modified3 . ':' . $modified4) {
619-
if (($match[2] == '') || (trim($match[2], "'") == $worksheetName)) {
620-
$toString = ($match[2] > '') ? $match[2] . '!' : '';
621-
$toString .= $modified3 . ':' . $modified4;
633+
if (self::matchSheetName($match[2], $worksheetName)) {
634+
$toString = self::sheetnameBeforeCells($match[2], $worksheetName, "$modified3:$modified4");
622635
// Max worksheet size is 1,048,576 rows by 16,384 columns in Excel 2007, so our adjustments need to be at least one digit more
623636
$column = Coordinate::columnIndexFromString(trim($match[3], '$')) + 100000;
624637
$row = 10000000;
@@ -632,18 +645,17 @@ public function updateFormulaReferences(
632645
}
633646
}
634647
// Search for cell ranges (e.g. 'Sheet1'!A3:C5 or A3:C5) with or without $ absolutes (e.g. $A1:C$5)
635-
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_CELLRANGE . '/mui', ' ' . $formulaBlock . ' ', $matches, PREG_SET_ORDER);
648+
$formulaBlockx = ' ' . (preg_replace_callback(self::SHEETNAME_PART_WITH_SLASHES, $callback, "$formulaBlock") ?? "$formulaBlock") . ' ';
649+
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_CELLRANGE . '/mui', $formulaBlockx, $matches, PREG_SET_ORDER);
636650
if ($matchCount > 0) {
637651
foreach ($matches as $match) {
638-
$fromString = ($match[2] > '') ? $match[2] . '!' : '';
639-
$fromString .= $match[3] . ':' . $match[4];
652+
$fromString = self::sheetnameBeforeCells($match[2], $worksheetName, "{$match[3]}:{$match[4]}");
640653
$modified3 = $this->updateCellReference($match[3], $includeAbsoluteReferences, $onlyAbsoluteReferences);
641654
$modified4 = $this->updateCellReference($match[4], $includeAbsoluteReferences, $onlyAbsoluteReferences);
642655

643656
if ($match[3] . $match[4] !== $modified3 . $modified4) {
644-
if (($match[2] == '') || (trim($match[2], "'") == $worksheetName)) {
645-
$toString = ($match[2] > '') ? $match[2] . '!' : '';
646-
$toString .= $modified3 . ':' . $modified4;
657+
if (self::matchSheetName($match[2], $worksheetName)) {
658+
$toString = self::sheetnameBeforeCells($match[2], $worksheetName, "$modified3:$modified4");
647659
[$column, $row] = Coordinate::coordinateFromString($match[3]);
648660
// Max worksheet size is 1,048,576 rows by 16,384 columns in Excel 2007, so our adjustments need to be at least one digit more
649661
$column = Coordinate::columnIndexFromString(trim($column, '$')) + 100000;
@@ -658,18 +670,18 @@ public function updateFormulaReferences(
658670
}
659671
}
660672
// Search for cell references (e.g. 'Sheet1'!A3 or C5) with or without $ absolutes (e.g. $A1 or C$5)
661-
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_CELLREF . '/mui', ' ' . $formulaBlock . ' ', $matches, PREG_SET_ORDER);
673+
674+
$formulaBlockx = ' ' . (preg_replace_callback(self::SHEETNAME_PART_WITH_SLASHES, $callback, $formulaBlock) ?? $formulaBlock) . ' ';
675+
$matchCount = preg_match_all('/' . self::REFHELPER_REGEXP_CELLREF . '/mui', $formulaBlockx, $matches, PREG_SET_ORDER);
662676

663677
if ($matchCount > 0) {
664678
foreach ($matches as $match) {
665-
$fromString = ($match[2] > '') ? $match[2] . '!' : '';
666-
$fromString .= $match[3];
679+
$fromString = self::sheetnameBeforeCells($match[2], $worksheetName, "{$match[3]}");
667680

668681
$modified3 = $this->updateCellReference($match[3], $includeAbsoluteReferences, $onlyAbsoluteReferences);
669682
if ($match[3] !== $modified3) {
670-
if (($match[2] == '') || (trim($match[2], "'") == $worksheetName)) {
671-
$toString = ($match[2] > '') ? $match[2] . '!' : '';
672-
$toString .= $modified3;
683+
if (self::matchSheetName($match[2], $worksheetName)) {
684+
$toString = self::sheetnameBeforeCells($match[2], $worksheetName, "$modified3");
673685
[$column, $row] = Coordinate::coordinateFromString($match[3]);
674686
$columnAdditionalIndex = $column[0] === '$' ? 1 : 0;
675687
$rowAdditionalIndex = $row[0] === '$' ? 1 : 0;
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests;
6+
7+
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8+
use PHPUnit\Framework\TestCase;
9+
10+
class ReferenceHelper4Test extends TestCase
11+
{
12+
/**
13+
* @dataProvider dataProvider
14+
*/
15+
public function testIssue3907(string $expectedResult, string $settingsTitle, string $formula, string $dataTitle = 'DATA'): void
16+
{
17+
$spreadsheet = new Spreadsheet();
18+
$dataSheet = $spreadsheet->getActiveSheet();
19+
$dataSheet->setTitle($dataTitle);
20+
$settingsSheet = $spreadsheet->createSheet();
21+
$settingsSheet->setTitle($settingsTitle);
22+
$settingsSheet->getCell('A1')->setValue(10);
23+
$settingsSheet->getCell('B1')->setValue(20);
24+
$dataSheet->getCell('A5')->setValue($formula);
25+
$dataSheet->getCell('A2')->setValue(1);
26+
$dataSheet->insertNewColumnBefore('A');
27+
self::assertSame($expectedResult, $dataSheet->getCell('B5')->getValue());
28+
$spreadsheet->disconnectWorksheets();
29+
}
30+
31+
public static function dataProvider(): array
32+
{
33+
return [
34+
["=SUM(B2, 'F1 (SETTINGS)'!A1:B1)", 'F1 (SETTINGS)', "=SUM(A2, 'F1 (SETTINGS)'!A1:B1)"],
35+
["=SUM(B2, 'x F1 (SETTINGS)'!A1:B1)", 'x F1 (SETTINGS)', "=SUM(A2, 'x F1 (SETTINGS)'!A1:B1)"],
36+
["=SUM(B2, 'DATA'!B1)", 'F1 (SETTINGS)', "=SUM(A2, 'DATA'!A1)"],
37+
["=SUM(B2, 'DATA'!C1)", 'F1 (SETTINGS)', "=SUM(A2, 'data'!B1)"],
38+
['=SUM(B2, DATA!D1)', 'F1 (SETTINGS)', '=SUM(A2, data!C1)'],
39+
["=SUM(B2, 'DATA'!B1:C1)", 'F1 (SETTINGS)', "=SUM(A2, 'Data'!A1:B1)"],
40+
['=SUM(B2, DATA!B1:C1)', 'F1 (SETTINGS)', '=SUM(A2, DAta!A1:B1)'],
41+
["=SUM(B2, 'F1 Data'!C1)", 'F1 (SETTINGS)', "=SUM(A2, 'F1 Data'!B1)", 'F1 Data'],
42+
["=SUM(B2, 'x F1 Data'!C1)", 'F1 (SETTINGS)', "=SUM(A2, 'x F1 Data'!B1)", 'x F1 Data'],
43+
["=SUM(B2, 'x F1 Data'!C1)", 'F1 (SETTINGS)', "=SUM(A2, 'x F1 Data'!B1)", 'x F1 Data'],
44+
["=SUM(B2, 'x F1 Data'!C1:D2)", 'F1 (SETTINGS)', "=SUM(A2, 'x F1 Data'!B1:C2)", 'x F1 Data'],
45+
['=SUM(B2, definedname1A1)', 'F1 (SETTINGS)', '=SUM(A2, definedname1A1)', 'x F1 Data'],
46+
];
47+
}
48+
}

0 commit comments

Comments
 (0)