From d9834e055854f61f72969643b2772878996f24c6 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Tue, 27 Apr 2021 18:18:09 +0200 Subject: [PATCH 1/3] Improve Range handling in the Calculation Engine for Row and Column ranges --- .../Calculation/Calculation.php | 104 ++++++++++++------ .../Calculation/MathTrig/Sum.php | 6 +- .../Calculation/RowColumnReferenceTest.php | 87 +++++++++++++++ 3 files changed, 162 insertions(+), 35 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 97bd177927..c09ac51e87 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -30,6 +30,9 @@ class Calculation const CALCULATION_REGEXP_CELLREF = '((([^\s,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?\$?\b([a-z]{1,3})\$?(\d{1,7})(?![\w.])'; // Cell reference (with or without a sheet reference) ensuring absolute/relative const CALCULATION_REGEXP_CELLREF_RELATIVE = '((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?\b[a-z]{1,3})(\$?\d{1,7})(?![\w.])'; + const CALCULATION_REGEXP_COLUMN_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[a-z]{1,3})):(?![.*])'; + const CALCULATION_REGEXP_ROW_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[1-9][0-9]{0,6})):(?![.*])'; + // Cell reference (with or without a sheet reference) ensuring absolute/relative // Cell ranges ensuring absolute/relative const CALCULATION_REGEXP_COLUMNRANGE_RELATIVE = '(\$?[a-z]{1,3}):(\$?[a-z]{1,3})'; const CALCULATION_REGEXP_ROWRANGE_RELATIVE = '(\$?\d{1,7}):(\$?\d{1,7})'; @@ -3798,6 +3801,8 @@ private function internalParseFormula($formula, ?Cell $pCell = null) $regexpMatchString = '/^(' . self::CALCULATION_REGEXP_FUNCTION . '|' . self::CALCULATION_REGEXP_CELLREF . + '|' . self::CALCULATION_REGEXP_COLUMN_RANGE . + '|' . self::CALCULATION_REGEXP_ROW_RANGE . '|' . self::CALCULATION_REGEXP_NUMBER . '|' . self::CALCULATION_REGEXP_STRING . '|' . self::CALCULATION_REGEXP_OPENBRACE . @@ -3866,7 +3871,8 @@ private function internalParseFormula($formula, ?Cell $pCell = null) $opCharacter .= $formula[++$index]; } // Find out if we're currently at the beginning of a number, variable, cell reference, function, parenthesis or operand - $isOperandOrFunction = preg_match($regexpMatchString, substr($formula, $index), $match); + $isOperandOrFunction = (bool) preg_match($regexpMatchString, substr($formula, $index), $match); + if ($opCharacter == '-' && !$expectingOperator) { // Is it a negation instead of a minus? // Put a negation on the stack $stack->push('Unary Operator', '~', null, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); @@ -4038,6 +4044,7 @@ private function internalParseFormula($formula, ?Cell $pCell = null) $expectingOperand = false; $val = $match[1]; $length = strlen($val); + if (preg_match('/^' . self::CALCULATION_REGEXP_FUNCTION . '$/miu', $val, $matches)) { $val = preg_replace('/\s/u', '', $val); if (isset(self::$phpSpreadsheetFunctions[strtoupper($matches[1])]) || isset(self::$controlFunctions[strtoupper($matches[1])])) { // it's a function @@ -4074,7 +4081,7 @@ private function internalParseFormula($formula, ?Cell $pCell = null) // Should only be applied to the actual cell column, not the worksheet name // If the last entry on the stack was a : operator, then we have a cell range reference $testPrevOp = $stack->last(1); - if ($testPrevOp !== null && $testPrevOp['value'] == ':') { + if ($testPrevOp !== null && $testPrevOp['value'] === ':') { // If we have a worksheet reference, then we're playing with a 3D reference if ($matches[2] == '') { // Otherwise, we 'inherit' the worksheet reference from the start cell reference @@ -4091,62 +4098,57 @@ private function internalParseFormula($formula, ?Cell $pCell = null) return $this->raiseFormulaError('3D Range references are not yet supported'); } } + } elseif (strpos($val, '!') === false && $pCellParent !== null) { + $worksheet = $pCellParent->getTitle(); + $val = "'{$worksheet}'!{$val}"; } $outputItem = $stack->getStackItem('Cell Reference', $val, $val, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); $output[] = $outputItem; } else { // it's a variable, constant, string, number or boolean + $localeConstant = false; + $stackItemType = 'Value'; + $stackItemReference = null; + // If the last entry on the stack was a : operator, then we may have a row or column range reference $testPrevOp = $stack->last(1); if ($testPrevOp !== null && $testPrevOp['value'] === ':') { + $stackItemType = 'Cell Reference'; $startRowColRef = $output[count($output) - 1]['value']; [$rangeWS1, $startRowColRef] = Worksheet::extractSheetTitle($startRowColRef, true); $rangeSheetRef = $rangeWS1; - if ($rangeWS1 != '') { + if ($rangeWS1 !== '') { $rangeWS1 .= '!'; } + $rangeSheetRef = trim($rangeSheetRef, "'"); [$rangeWS2, $val] = Worksheet::extractSheetTitle($val, true); - if ($rangeWS2 != '') { + if ($rangeWS2 !== '') { $rangeWS2 .= '!'; } else { $rangeWS2 = $rangeWS1; } + $refSheet = $pCellParent; - if ($pCellParent !== null && $rangeSheetRef !== $pCellParent->getTitle()) { + if ($pCellParent !== null && $rangeSheetRef !== '' && $rangeSheetRef !== $pCellParent->getTitle()) { $refSheet = $pCellParent->getParent()->getSheetByName($rangeSheetRef); } - if ( - (is_int($startRowColRef)) && (ctype_digit($val)) && - ($startRowColRef <= 1048576) && ($val <= 1048576) - ) { + + if (ctype_digit($val) && $val <= 1048576) { // Row range - $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestColumn() : 'XFD'; // Max 16,384 columns for Excel2007 - $output[count($output) - 1]['value'] = $rangeWS1 . 'A' . $startRowColRef; - $val = $rangeWS2 . $endRowColRef . $val; - } elseif ( - (ctype_alpha($startRowColRef)) && (ctype_alpha($val)) && - (strlen($startRowColRef) <= 3) && (strlen($val) <= 3) - ) { + $stackItemType = 'Row Reference'; + $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataColumn($val) : 'XFD'; // Max 16,384 columns for Excel2007 + $val = "{$rangeWS2}{$endRowColRef}{$val}"; + } elseif (ctype_alpha($val) && strlen($val) <= 3) { // Column range - $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestRow() : 1048576; // Max 1,048,576 rows for Excel2007 - $output[count($output) - 1]['value'] = $rangeWS1 . strtoupper($startRowColRef) . '1'; - $val = $rangeWS2 . $val . $endRowColRef; + $stackItemType = 'Column Reference'; + $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataRow($val) : 1048576; // Max 1,048,576 rows for Excel2007 + $val = "{$rangeWS2}{$val}{$endRowColRef}"; } - } - - $localeConstant = false; - $stackItemType = 'Value'; - $stackItemReference = null; - if ($opCharacter == self::FORMULA_STRING_QUOTE) { + $stackItemReference = $val; + } elseif ($opCharacter == self::FORMULA_STRING_QUOTE) { // UnEscape any quotes within the string $val = self::wrapResult(str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($val))); - } elseif (is_numeric($val)) { - if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) { - $val = (float) $val; - } else { - $val = (int) $val; - } } elseif (isset(self::$excelConstants[trim(strtoupper($val))])) { $stackItemType = 'Constant'; $excelConstant = trim(strtoupper($val)); @@ -4154,10 +4156,41 @@ private function internalParseFormula($formula, ?Cell $pCell = null) } elseif (($localeConstant = array_search(trim(strtoupper($val)), self::$localeBoolean)) !== false) { $stackItemType = 'Constant'; $val = self::$excelConstants[$localeConstant]; + } elseif ( + preg_match('/^' . self::CALCULATION_REGEXP_ROW_RANGE . '/miu', substr($formula, $index), $rowRangeReference) + ) { + $val = $rowRangeReference[1]; + $length = strlen($rowRangeReference[1]); + $stackItemType = 'Row Reference'; + $column = 'A'; + if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) { + $column = $pCellParent->getHighestDataColumn((int) $val); + } + $val = "{$rowRangeReference[2]}{$column}{$rowRangeReference[7]}"; + $stackItemReference = $val; + } elseif ( + preg_match('/^' . self::CALCULATION_REGEXP_COLUMN_RANGE . '/miu', substr($formula, $index), $columnRangeReference) + ) { + $val = $columnRangeReference[1]; + $length = strlen($val); + $stackItemType = 'Column Reference'; + $row = '1'; + if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) { + $row = $pCellParent->getHighestDataRow($val); + } + $val = "{$val}{$row}"; + $stackItemReference = $val; } elseif (preg_match('/^' . self::CALCULATION_REGEXP_DEFINEDNAME . '.*/miu', $val, $match)) { $stackItemType = 'Defined Name'; $stackItemReference = $val; + } elseif (is_numeric($val)) { + if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) { + $val = (float) $val; + } else { + $val = (int) $val; + } } + $details = $stack->getStackItem($stackItemType, $val, $stackItemReference, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); if ($localeConstant) { $details['localeValue'] = $localeConstant; @@ -4431,6 +4464,7 @@ private function processTokenStack($tokens, $cellID = null, ?Cell $pCell = null) } else { return $this->raiseFormulaError('Unable to access Cell Reference'); } + $stack->push('Cell Reference', $cellValue, $cellRef); } else { $stack->push('Error', Functions::REF(), null); @@ -4564,6 +4598,7 @@ private function processTokenStack($tokens, $cellID = null, ?Cell $pCell = null) } } elseif (preg_match('/^' . self::CALCULATION_REGEXP_CELLREF . '$/i', $token, $matches)) { $cellRef = null; + if (isset($matches[8])) { if ($pCell === null) { // We can't access the range, so return a REF error @@ -4596,7 +4631,7 @@ private function processTokenStack($tokens, $cellID = null, ?Cell $pCell = null) } } else { if ($pCell === null) { - // We can't access the cell, so return a REF error + // We can't access the cell, so return a REF error $cellValue = Functions::REF(); } else { $cellRef = $matches[6] . $matches[7]; @@ -4613,6 +4648,7 @@ private function processTokenStack($tokens, $cellID = null, ?Cell $pCell = null) $cellValue = $this->extractCellRange($cellRef, $this->spreadsheet->getSheetByName($matches[2]), false); $pCell->attach($pCellParent); } else { + $cellRef = ($cellSheet !== null) ? "{$matches[2]}!{$cellRef}" : $cellRef; $cellValue = null; } } else { @@ -4631,7 +4667,8 @@ private function processTokenStack($tokens, $cellID = null, ?Cell $pCell = null) } } } - $stack->push('Value', $cellValue, $cellRef); + + $stack->push('Cell Value', $cellValue, $cellRef); if (isset($storeKey)) { $branchStore[$storeKey] = $cellValue; } @@ -5116,6 +5153,7 @@ public function extractCellRange(&$pRange = 'A1', ?Worksheet $pSheet = null, $re if ($pSheet !== null) { $pSheetName = $pSheet->getTitle(); + if (strpos($pRange, '!') !== false) { [$pSheetName, $pRange] = Worksheet::extractSheetTitle($pRange, true); $pSheet = $this->spreadsheet->getSheetByName($pSheetName); diff --git a/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php b/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php index cd29248bd6..ab3a9a07c1 100644 --- a/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php +++ b/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php @@ -50,11 +50,13 @@ public static function funcSum(...$args) public static function funcSumNoStrings(...$args) { $returnValue = 0; - // Loop through the arguments foreach (Functions::flattenArray($args) as $arg) { // Is it a numeric value? - if (is_numeric($arg)) { + if (is_numeric($arg) || empty($arg)) { + if (is_string($arg)) { + $arg = (int) $arg; + } $returnValue += $arg; } elseif (Functions::isError($arg)) { return $arg; diff --git a/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php new file mode 100644 index 0000000000..2c2f4c119d --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php @@ -0,0 +1,87 @@ +spreadSheet = new Spreadsheet(); + + $dataSheet = new Worksheet($this->spreadSheet, 'data sheet'); + $this->spreadSheet->addSheet($dataSheet, 0); + $dataSheet->setCellValue('B1', 1.1); + $dataSheet->setCellValue('B2', 2.2); + $dataSheet->setCellValue('B3', 4.4); + $dataSheet->setCellValue('C3', 8.8); + $dataSheet->setCellValue('D3', 16.16); + + $calcSheet = new Worksheet($this->spreadSheet, 'summary sheet'); + $this->spreadSheet->addSheet($calcSheet, 1); + $calcSheet->setCellValue('B1', 2.2); + $calcSheet->setCellValue('B2', 4.4); + $calcSheet->setCellValue('B3', 8.8); + $calcSheet->setCellValue('C3', 16.16); + $calcSheet->setCellValue('D3', 32.32); + + $this->spreadSheet->setActiveSheetIndexByName('summary sheet'); + } + + /** + * @dataProvider providerCurrentWorksheetFormulae + * + * @param mixed $formula + * @param mixed $expectedResult + */ + public function testCurrentWorksheet(string $formula, float $expectedResult): void + { + $worksheet = $this->spreadSheet->getActiveSheet(); + + $worksheet->setCellValue('A1', $formula); + + $result = $worksheet->getCell('A1')->getCalculatedValue(); + self::assertSame($expectedResult, $result); + } + + public function providerCurrentWorksheetFormulae(): array + { + return [ + 'relative range in active worksheet' => ['=SUM(B1:B3)', 15.4], + 'range with absolute columns in active worksheet' => ['=SUM($B1:$B3)', 15.4], + 'range with absolute rows in active worksheet' => ['=SUM(B$1:B$3)', 15.4], + 'range with absolute columns and rows in active worksheet' => ['=SUM($B$1:$B$3)', 15.4], + 'another relative range in active worksheet' => ['=SUM(B3:D3)', 57.28], + 'relative column range in active worksheet' => ['=SUM(B:B)', 15.4], + 'absolute column range in active worksheet' => ['=SUM($B:$B)', 15.4], + 'relative row range in active worksheet' => ['=SUM(3:3)', 57.28], + 'absolute row range in active worksheet' => ['=SUM($3:$3)', 57.28], + 'relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B1:B3)', 15.4], + 'range with absolute columns in specified active worksheet' => ['=SUM(\'summary sheet\'!$B1:$B3)', 15.4], + 'range with absolute rows in specified active worksheet' => ['=SUM(\'summary sheet\'!B$1:B$3)', 15.4], + 'range with absolute columns and rows in specified active worksheet' => ['=SUM(\'summary sheet\'!$B$1:$B$3)', 15.4], + 'another relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B3:D3)', 57.28], + 'relative column range in specified active worksheet' => ['=SUM(\'summary sheet\'!B:B)', 15.4], + 'absolute column range in specified active worksheet' => ['=SUM(\'summary sheet\'!$B:$B)', 15.4], + 'relative row range in specified active worksheet' => ['=SUM(\'summary sheet\'!3:3)', 57.28], + 'absolute row range in specified active worksheet' => ['=SUM(\'summary sheet\'!$3:$3)', 57.28], + 'relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B1:B3)', 7.7], + 'range with absolute columns in specified other worksheet' => ['=SUM(\'data sheet\'!$B1:$B3)', 7.7], + 'range with absolute rows in specified other worksheet' => ['=SUM(\'data sheet\'!B$1:B$3)', 7.7], + 'range with absolute columns and rows in specified other worksheet' => ['=SUM(\'data sheet\'!$B$1:$B$3)', 7.7], + 'another relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B3:D3)', 29.36], + 'relative column range in specified other worksheet' => ['=SUM(\'data sheet\'!B:B)', 7.7], + 'absolute column range in specified other worksheet' => ['=SUM(\'data sheet\'!$B:$B)', 7.7], + 'relative row range in specified other worksheet' => ['=SUM(\'data sheet\'!3:3)', 29.36], + 'absolute row range in specified other worksheet' => ['=SUM(\'data sheet\'!$3:$3)', 29.36], + ]; + } +} From d5952f3bb63a17c70b8143466ba90f7598cfc16c Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Tue, 27 Apr 2021 18:26:33 +0200 Subject: [PATCH 2/3] PHPStan appeasement --- src/PhpSpreadsheet/Calculation/Calculation.php | 2 +- .../PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index c09ac51e87..abd997a33a 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -4164,7 +4164,7 @@ private function internalParseFormula($formula, ?Cell $pCell = null) $stackItemType = 'Row Reference'; $column = 'A'; if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) { - $column = $pCellParent->getHighestDataColumn((int) $val); + $column = $pCellParent->getHighestDataColumn($val); } $val = "{$rowRangeReference[2]}{$column}{$rowRangeReference[7]}"; $stackItemReference = $val; diff --git a/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php index 2c2f4c119d..8c9d23f76b 100644 --- a/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php +++ b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php @@ -38,9 +38,6 @@ protected function setUp(): void /** * @dataProvider providerCurrentWorksheetFormulae - * - * @param mixed $formula - * @param mixed $expectedResult */ public function testCurrentWorksheet(string $formula, float $expectedResult): void { From 4a0eb62d04b633dbbd1ff76528fb5d39eefd02d2 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Tue, 27 Apr 2021 18:53:24 +0200 Subject: [PATCH 3/3] Update Change Log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb09f62885..21762f893b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Added +- Improved support for Row and Column ranges in formulae [Issue #1755](https://github.com/PHPOffice/PhpSpreadsheet/issues/1755) [PR #2028](https://github.com/PHPOffice/PhpSpreadsheet/pull/2028) - Implemented the CHITEST(), CHISQ.DIST() and CHISQ.INV() and equivalent Statistical functions, for both left- and right-tailed distributions. - Support for ActiveSheet and SelectedCells in the ODS Reader and Writer. [PR #1908](https://github.com/PHPOffice/PhpSpreadsheet/pull/1908)