diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b8534ef61..d5e64bfc18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Added -- Nothing yet. +- Address Excel Inappropriate Number Format Substitution. [PR #4532](https://github.com/PHPOffice/PhpSpreadsheet/pull/4532) ### Removed diff --git a/docs/topics/Excel Anomalies.md b/docs/topics/Excel Anomalies.md new file mode 100644 index 0000000000..d41337a5f5 --- /dev/null +++ b/docs/topics/Excel Anomalies.md @@ -0,0 +1,46 @@ +# Excel Anomalies + +This is documentation for some behavior in Excel itself which we +just do not understand, or which may come as a surprise to the user. + +## Date Number Format + +My system short date format is set to `yyyy-mm-dd`. Excel, for a very long time, did not include that amongst its formatting choices for dates, so it needed to be added as a custom format - no big deal. It has recently been added to the list of date formats, but ... + +I used Excel to create a spreadsheet, and included some dates, specifying `yyyy-mm-dd` formatting. When I looked at the resulting spreadsheet, I was surprised to see that Excel had stored the style not as `yyyy-mm-dd`, but rather as builtin style 14 (system short date format). Apparently the fact that the Excel styling matched my system choice was sufficient for it to override my choice! This is an astonishingly user-hostile implementation. Even though there are formats which, by design, "respond to changes in regional date and time settings", and even though the format I selected was not among those, Excel decided it was appropriate to vary the display even when I said I wanted an unvarying format. I assume, but have not confirmed, that this applies to formats other than `yyyy-mm-dd`. + +Note that this is not a problem when using PhpSpreadsheet to set the style, only when you let Excel do it. And, in that case, after a little experimentation, I figured out a format that Excel doesn't sabotage `[Black]yyyy-mm-dd`. + +If you have a spreadsheet that has been altered in this way, it can be fixed with the following PhpSpreadsheet code: +```php + foreach ($spreadsheet->getCellXfCollection() as $style) { + $numberFormat = $style->getNumberFormat(); + // okay to use NumberFormat::SHORT_DATE_INDEX below + if ($numberFormat->getBuiltInFormatCode() === 14) { + $numberFormat->setFormatCode('yyyy-mm-dd'); + } + } +``` +Starting with PhpSpreadsheet 4.5.0, this can be simplified to: +```php + $spreadsheet->replaceBuiltinNumberFormat( + \PhpOffice\PhpSpreadsheet\Style\NumberFormat::SHORT_DATE_INDEX, + 'yyyy-mm-dd' + ); +``` + +## Negative Time Intervals + +You have a time in one cell, and a time in another, and you want to subtract and display the result in `h:mm` format. No problem if the result is positive. But, if it's negative, Excel just fills the cell with `#`. There is a solution of sorts. If you use a 1904 base date (default on Mac), the negative interval will work just fine. Alas, no dice if you use a 1900 base data (default on Windows). No idea why they can't fix that - the existing implementation can't really be something that anybody actually wants. Note that it is *not* safe to change the base date for an existing spreadsheet, so, if this is something you want to do, make sure you change the base date before populating any data. + +## Long-ago Dates + +Excel does not support dates before either 1900-01-01 (Windows default) or 1904-01-01 (Mac default). For the 1900 base year, there is the additional problem that non-existent date 1900-02-29 is squeezed between 1900-02-28 and 1900-03-01. + +## Weird Fractions + +Similar fraction formats have inconsistent results in Excel. For example, if a cell contains the value 1 and the cell's format is `0 0/0`, it will display as `1 0/1`. But, if the cell's format is `? ??/???`, it will display as `1`. See [this issue](https://github.com/PHPOffice/PhpSpreadsheet/issues/3625), which remains open because, in the absence of usable documentation, we aren't sure how to handle things. + +## COUNTIF and Text Cells + +In Excel, COUNTIF appears to ignore text cells, behavior which doesn't seem to be documented anywhere. See [this issue](https://github.com/PHPOffice/PhpSpreadsheet/issues/3802), which remains open because, in the absence of usable documentation, we aren't sure how to handle things. \ No newline at end of file diff --git a/docs/topics/The Dating Game.md b/docs/topics/The Dating Game.md index 5b0c2812ff..cbe46ccaeb 100644 --- a/docs/topics/The Dating Game.md +++ b/docs/topics/The Dating Game.md @@ -184,7 +184,7 @@ MS Excel allows any separator character between hours/minutes/seconds; PhpSpread ### Duration (Elapsed Time) -Excel also supports formatting a value as a duration; a total number of hours, minutes or seconds rather than a time of day. +Excel also supports formatting a value as a duration; a total number of hours, minutes or seconds rather than a time of day. However, please note that negative durations are supported only if using base year 1904 (Mac default). | Code | Description | Displays as | |---------|----------------------------------------------------------------|-------------| diff --git a/src/PhpSpreadsheet/Spreadsheet.php b/src/PhpSpreadsheet/Spreadsheet.php index 656f3b5593..d035f402a5 100644 --- a/src/PhpSpreadsheet/Spreadsheet.php +++ b/src/PhpSpreadsheet/Spreadsheet.php @@ -1784,4 +1784,21 @@ public function mergeDrawingCellsForPdf(): void } } } + + /** + * Excel will sometimes replace user's formatting choice + * with a built-in choice that it thinks is equivalent. + * Its choice is often not equivalent after all. + * Such treatment is astonishingly user-hostile. + * This function will undo such changes. + */ + public function replaceBuiltinNumberFormat(int $builtinFormatIndex, string $formatCode): void + { + foreach ($this->cellXfCollection as $style) { + $numberFormat = $style->getNumberFormat(); + if ($numberFormat->getBuiltInFormatCode() === $builtinFormatIndex) { + $numberFormat->setFormatCode($formatCode); + } + } + } } diff --git a/tests/PhpSpreadsheetTests/Reader/Xlsx/ReplaceBuiltinNumberFormatTest.php b/tests/PhpSpreadsheetTests/Reader/Xlsx/ReplaceBuiltinNumberFormatTest.php new file mode 100644 index 0000000000..59b5cf3c3f --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Xlsx/ReplaceBuiltinNumberFormatTest.php @@ -0,0 +1,69 @@ +spreadsheet !== null) { + $this->spreadsheet->disconnectWorksheets(); + $this->spreadsheet = null; + } + if ($this->reloadedSpreadsheet !== null) { + $this->reloadedSpreadsheet->disconnectWorksheets(); + $this->reloadedSpreadsheet = null; + } + } + + public function testReplaceBuiltinNumberFormat(): void + { + $spreadsheet = $this->spreadsheet = new Spreadsheet(); + $sheet = $this->spreadsheet->getActiveSheet(); + $sheet->fromArray([45486, 1023, 45487, 45488, 45489]); + $sheet->getStyle('A1')->getNumberFormat() + ->setBuiltInFormatCode(NumberFormat::SHORT_DATE_INDEX); + $sheet->getStyle('B1')->getNumberFormat() + ->setFormatCode('#,##0.00'); + $sheet->getStyle('C1')->getNumberFormat() + ->setBuiltInFormatCode(NumberFormat::SHORT_DATE_INDEX); + $sheet->getStyle('D1')->getNumberFormat() + ->setFormatCode('dd-MMM-yyyy'); + $sheet->getStyle('E1')->getNumberFormat() + ->setBuiltInFormatCode(16); + $values = $sheet->toArray(); + $expected = [[ + '7/13/2024', // builtin style 14 + '1,023.00', // #,##0.00 + '7/14/2024', // builtin style 14 + '15-Jul-2024', // dd-MMM-yyyy + '16-Jul', // builtin style 16 + ]]; + self::assertSame($expected, $values); + $this->reloadedSpreadsheet = $this->writeAndReload($spreadsheet, 'Xlsx'); + $this->reloadedSpreadsheet->replaceBuiltinNumberFormat( + NumberFormat::SHORT_DATE_INDEX, + 'yyyy-mm-dd' + ); + $rsheet = $this->reloadedSpreadsheet->getActiveSheet(); + $newValues = $rsheet->toArray(); + $newExpected = [[ + '2024-07-13', // yyyy-mm-dd changed from builtin style 14 + '1,023.00', // unchanged #,##0.00 + '2024-07-14', // yyyy-mm-dd changed from builtin style 14 + '15-Jul-2024', // unchanged dd-MMM-yyyy + '16-Jul', // unchanged builtin style 16 + ]]; + self::assertSame($newExpected, $newValues); + } +}