Skip to content

Commit 8de218f

Browse files
authored
Problems Formatting Very Small and Very Large Numbers (#3152)
* Problems Formatting Very Small and Very Large Numbers Fix #3128 (author found a workaround but the problem remains). For some complex masks, when a cast of the cell value from float to string results in the use of scientific notation, the result of the formatting is unusable. I believe this PR solves the problem for numbers close to zero (Php cast from float to string uses scientific notation starting with 1E-5), and for a range of large numbers which are not handled correctly now. However, I have not found a way to ensure that the results match Excel for very large numbers (1E18 or larger); this change at least ensures that the resulting string is an accurate rendition of the number (which is not the case now) even if doesn't match Excel. As an example, if you use the mask reported in the original issue `0 000.0` and enter a value of 1E90 into the cell, Excel will show it as 1 followed by 87 zeros, a space, 3 more zeros, decimal point and zero. I have not figured out how to get PhpSpreadsheet to do that; for now, it will just return the formatted value as 1 followed by 90 zeroes instead (I might have chosen to go with scientific notation instead). I will continue to think about those, but do not feel it is worth delaying the improvements in this ticket while I do so. The affected section of code also truncated to the appropriate precision. It now rounds, as Excel does. This seemed to be an area of code where problems might arise on 32-bit systems, and, indeed, I found something in the formatting code which had to be changed for 32-bit to work correctly. As long as I was doing that anyhow, I ran the full test suite, and found that Php8.1 had introduced some new stringencies which caused problems in a handful of places. All were found in Xls Reader, and all are corrected now. * Remove Dead Assignment Scrutinizer will be happy now.
1 parent e05e354 commit 8de218f

File tree

6 files changed

+115
-12
lines changed

6 files changed

+115
-12
lines changed

samples/templates/47_xlsfill.xls

512 Bytes
Binary file not shown.

src/PhpSpreadsheet/Reader/Xls.php

+6-6
Original file line numberDiff line numberDiff line change
@@ -2278,7 +2278,7 @@ private function readXf(): void
22782278
$diagonalDown = (0x40000000 & self::getInt4d($recordData, 10)) >> 30 ? true : false;
22792279

22802280
// bit: 31; mask: 0x80000000; 1 = diagonal line from bottom left to top right
2281-
$diagonalUp = (0x80000000 & self::getInt4d($recordData, 10)) >> 31 ? true : false;
2281+
$diagonalUp = ((int) 0x80000000 & self::getInt4d($recordData, 10)) >> 31 ? true : false;
22822282

22832283
if ($diagonalUp === false) {
22842284
if ($diagonalDown == false) {
@@ -2308,7 +2308,7 @@ private function readXf(): void
23082308
}
23092309

23102310
// bit: 31-26; mask: 0xFC000000 fill pattern
2311-
if ($fillType = Xls\Style\FillPattern::lookup((0xFC000000 & self::getInt4d($recordData, 14)) >> 26)) {
2311+
if ($fillType = Xls\Style\FillPattern::lookup(((int) 0xFC000000 & self::getInt4d($recordData, 14)) >> 26)) {
23122312
$objStyle->getFill()->setFillType($fillType);
23132313
}
23142314
// offset: 18; size: 2; pattern and background colour
@@ -2360,7 +2360,7 @@ private function readXf(): void
23602360
$objStyle->getBorders()->getBottom()->setBorderStyle(Xls\Style\Border::lookup((0x01C00000 & $borderAndBackground) >> 22));
23612361

23622362
// bit: 31-25; mask: 0xFE000000; bottom line color
2363-
$objStyle->getBorders()->getBottom()->colorIndex = (0xFE000000 & $borderAndBackground) >> 25;
2363+
$objStyle->getBorders()->getBottom()->colorIndex = ((int) 0xFE000000 & $borderAndBackground) >> 25;
23642364

23652365
// offset: 12; size: 4; cell border lines
23662366
$borderLines = self::getInt4d($recordData, 12);
@@ -7699,10 +7699,10 @@ private static function extractNumber($data)
76997699
{
77007700
$rknumhigh = self::getInt4d($data, 4);
77017701
$rknumlow = self::getInt4d($data, 0);
7702-
$sign = ($rknumhigh & 0x80000000) >> 31;
7702+
$sign = ($rknumhigh & (int) 0x80000000) >> 31;
77037703
$exp = (($rknumhigh & 0x7ff00000) >> 20) - 1023;
77047704
$mantissa = (0x100000 | ($rknumhigh & 0x000fffff));
7705-
$mantissalow1 = ($rknumlow & 0x80000000) >> 31;
7705+
$mantissalow1 = ($rknumlow & (int) 0x80000000) >> 31;
77067706
$mantissalow2 = ($rknumlow & 0x7fffffff);
77077707
$value = $mantissa / 2 ** (20 - $exp);
77087708

@@ -7733,7 +7733,7 @@ private static function getIEEE754($rknum)
77337733
// The RK format calls for using only the most significant 30 bits
77347734
// of the 64 bit floating point value. The other 34 bits are assumed
77357735
// to be 0 so we use the upper 30 bits of $rknum as follows...
7736-
$sign = ($rknum & 0x80000000) >> 31;
7736+
$sign = ($rknum & (int) 0x80000000) >> 31;
77377737
$exp = ($rknum & 0x7ff00000) >> 20;
77387738
$mantissa = (0x100000 | ($rknum & 0x000ffffc));
77397739
$value = $mantissa / 2 ** (20 - ($exp - 1023));

src/PhpSpreadsheet/Reader/Xls/MD5.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ private static function i(int $X, int $Y, int $Z): int
190190
private static function step(callable $func, int &$A, int $B, int $C, int $D, int $M, int $s, $t): void
191191
{
192192
$t = self::signedInt($t);
193-
$A = ($A + call_user_func($func, $B, $C, $D) + $M + $t) & self::$allOneBits;
193+
$A = (int) ($A + call_user_func($func, $B, $C, $D) + $M + $t) & self::$allOneBits;
194194
$A = self::rotate($A, $s);
195-
$A = ($B + $A) & self::$allOneBits;
195+
$A = (int) ($B + $A) & self::$allOneBits;
196196
}
197197

198198
/** @param float|int $result may be float on 32-bit system */

src/PhpSpreadsheet/Style/NumberFormat/NumberFormatter.php

+61-4
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,18 @@ private static function processComplexNumberFormatMask($number, string $mask): s
6868
*/
6969
private static function complexNumberFormatMask($number, string $mask, bool $splitOnPoint = true): string
7070
{
71-
$sign = ($number < 0.0) ? '-' : '';
7271
/** @var float */
7372
$numberFloat = $number;
74-
$number = (string) abs($numberFloat);
73+
if ($splitOnPoint) {
74+
$masks = explode('.', $mask);
75+
if (count($masks) <= 2) {
76+
$decmask = $masks[1] ?? '';
77+
$decpos = substr_count($decmask, '0');
78+
$numberFloat = round($numberFloat, $decpos);
79+
}
80+
}
81+
$sign = ($numberFloat < 0.0) ? '-' : '';
82+
$number = self::f2s(abs($numberFloat));
7583

7684
if ($splitOnPoint && strpos($mask, '.') !== false && strpos($number, '.') !== false) {
7785
$numbers = explode('.', $number);
@@ -80,16 +88,56 @@ private static function complexNumberFormatMask($number, string $mask, bool $spl
8088
$masks = self::mergeComplexNumberFormatMasks($numbers, $masks);
8189
}
8290
$integerPart = self::complexNumberFormatMask($numbers[0], $masks[0], false);
91+
$numlen = strlen($numbers[1]);
92+
$msklen = strlen($masks[1]);
93+
if ($numlen < $msklen) {
94+
$numbers[1] .= str_repeat('0', $msklen - $numlen);
95+
}
8396
$decimalPart = strrev(self::complexNumberFormatMask(strrev($numbers[1]), strrev($masks[1]), false));
97+
$decimalPart = substr($decimalPart, 0, $msklen);
8498

8599
return "{$sign}{$integerPart}.{$decimalPart}";
86100
}
87101

102+
if (strlen($number) < strlen($mask)) {
103+
$number = str_repeat('0', strlen($mask) - strlen($number)) . $number;
104+
}
88105
$result = self::processComplexNumberFormatMask($number, $mask);
89106

90107
return "{$sign}{$result}";
91108
}
92109

110+
public static function f2s(float $f): string
111+
{
112+
return self::floatStringConvertScientific((string) $f);
113+
}
114+
115+
public static function floatStringConvertScientific(string $s): string
116+
{
117+
// convert only normalized form of scientific notation:
118+
// optional sign, single digit 1-9,
119+
// decimal point and digits (allowed to be omitted),
120+
// E (e permitted), optional sign, one or more digits
121+
if (preg_match('/^([+-])?([1-9])([.]([0-9]+))?[eE]([+-]?[0-9]+)$/', $s, $matches) === 1) {
122+
$exponent = (int) $matches[5];
123+
$sign = ($matches[1] === '-') ? '-' : '';
124+
if ($exponent >= 0) {
125+
$exponentPlus1 = $exponent + 1;
126+
$out = $matches[2] . $matches[4];
127+
$len = strlen($out);
128+
if ($len < $exponentPlus1) {
129+
$out .= str_repeat('0', $exponentPlus1 - $len);
130+
}
131+
$out = substr($out, 0, $exponentPlus1) . ((strlen($out) === $exponentPlus1) ? '' : ('.' . substr($out, $exponentPlus1)));
132+
$s = "$sign$out";
133+
} else {
134+
$s = $sign . '0.' . str_repeat('0', -$exponent - 1) . $matches[2] . $matches[4];
135+
}
136+
}
137+
138+
return $s;
139+
}
140+
93141
/**
94142
* @param mixed $value
95143
*/
@@ -118,11 +166,20 @@ private static function formatStraightNumericValue($value, string $format, array
118166
// Scientific format
119167
return sprintf('%5.2E', $valueFloat);
120168
} elseif (preg_match('/0([^\d\.]+)0/', $format) || substr_count($format, '.') > 1) {
121-
if ($value == (int) $valueFloat && substr_count($format, '.') === 1) {
169+
if ($valueFloat == floor($valueFloat) && substr_count($format, '.') === 1) {
122170
$value *= 10 ** strlen(explode('.', $format)[1]);
123171
}
124172

125-
return self::complexNumberFormatMask($value, $format);
173+
$result = self::complexNumberFormatMask($value, $format);
174+
if (strpos($result, 'E') !== false) {
175+
// This is a hack and doesn't match Excel.
176+
// It will, at least, be an accurate representation,
177+
// even if formatted incorrectly.
178+
// This is needed for absolute values >=1E18.
179+
$result = self::f2s($valueFloat);
180+
}
181+
182+
return $result;
126183
}
127184

128185
$sprintf_pattern = "%0$minWidth." . strlen($right) . 'f';

tests/PhpSpreadsheetTests/Style/NumberFormatTest.php

+32
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
66
use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
7+
use PhpOffice\PhpSpreadsheet\Style\NumberFormat\NumberFormatter;
78
use PHPUnit\Framework\TestCase;
89

910
class NumberFormatTest extends TestCase
@@ -59,6 +60,7 @@ public function providerNumberFormat(): array
5960
* @dataProvider providerNumberFormatDates
6061
*
6162
* @param mixed $expectedResult
63+
* @param mixed $args
6264
*/
6365
public function testFormatValueWithMaskDate($expectedResult, ...$args): void
6466
{
@@ -84,4 +86,34 @@ public function testCurrencyCode(): void
8486
self::assertEquals($rslt, '$ 12,345.679');
8587
StringHelper::setCurrencyCode($cur);
8688
}
89+
90+
/**
91+
* @dataProvider providerNoScientific
92+
*/
93+
public function testNoScientific(string $expectedResult, string $numericString): void
94+
{
95+
$result = NumberFormatter::floatStringConvertScientific($numericString);
96+
self::assertSame($expectedResult, $result);
97+
}
98+
99+
public function providerNoScientific(): array
100+
{
101+
return [
102+
'large number' => ['92' . str_repeat('0', 16), '9.2E+17'],
103+
'no decimal portion' => ['16', '1.6E1'],
104+
'retain decimal 0 if supplied in string' => ['16.0', '1.60E1'],
105+
'exponent 0' => ['2.3', '2.3E0'],
106+
'whole and decimal' => ['16.5', '1.65E1'],
107+
'plus signs' => ['165000', '+1.65E+5'],
108+
'e2 one decimal' => ['489.7', '4.897E2'],
109+
'e2 no decimal' => ['-489', '-4.89E2'],
110+
'e2 fill units position' => ['480', '4.8E+2'],
111+
'no scientific notation' => ['3.14159', '3.14159'],
112+
'non-zero in first decimal' => ['0.165', '1.65E-1'],
113+
'one leading zero in decimal' => ['0.0165', '1.65E-2'],
114+
'four leading zeros in decimal' => ['-0.0000165', '-1.65E-5'],
115+
'small number' => ['0.' . str_repeat('0', 16) . '1', '1E-17'],
116+
'very small number' => ['0.' . str_repeat('0', 69) . '1', '1E-70'],
117+
];
118+
}
87119
}

tests/data/Style/NumberFormat.php

+14
Original file line numberDiff line numberDiff line change
@@ -1491,4 +1491,18 @@
14911491
'percent with leading 0' => ['06.2%', 0.062, '00.0%'],
14921492
'percent lead0 no decimal' => ['06%', 0.062, '00%'],
14931493
'percent nolead0 no decimal' => ['6%', 0.062, '##%'],
1494+
'scientific small complex mask discard all decimals' => ['0 000.0', 1e-17, '0 000.0'],
1495+
'scientific small complex mask keep some decimals' => ['-0 000.000027', -2.7e-5, '0 000.000000'],
1496+
'scientific small complex mask keep some decimals trailing zero' => ['-0 000.000040', -4e-5, '0 000.000000'],
1497+
'scientific large complex mask' => ['92' . str_repeat('0', 13) . ' 000.0', 9.2e17, '0 000.0'],
1498+
'scientific very large complex mask PhpSpreadsheet does not match Excel' => ['1' . str_repeat('0', 18), 1e18, '0 000.0'],
1499+
'scientific even larger complex mask PhpSpreadsheet does not match Excel' => ['43' . str_repeat('0', 89), 4.3e90, '0 000.0'],
1500+
'scientific many decimal positions' => ['000 0.000 01', 1e-5, '000 0.000 00'],
1501+
'round with scientific notation' => ['000 0.000 02', 1.6e-5, '000 0.000 00'],
1502+
'round with no decimals' => ['009 8', 97.7, '000 0'],
1503+
'round to 1 decimal' => ['009 7.2', 97.15, '000 0.0'],
1504+
'truncate with no decimals' => ['009 7', 97.3, '000 0'],
1505+
'truncate to 1 decimal' => ['009 7.1', 97.13, '000 0.0'],
1506+
'scientific many decimal positions truncated' => ['000 0.000 00', 1e-7, '000 0.000 00'],
1507+
'scientific very many decimal positions truncated' => ['000 0.000 00', 1e-17, '000 0.000 00'],
14941508
];

0 commit comments

Comments
 (0)