Skip to content

Commit 1282f3d

Browse files
authored
Performance Improvements for Csv Reader (#3769)
* Performance Improvements for Csv Reader Investigating issue #381, a means was suggested to duplicated a problem, but no problem occurred ... except for performance. This involved a spreadsheet with a large number of cells, definitely not PhpSpreadsheet's strong point; even so, the program (entirely available in the issue) took a disastrous two or so hours to complete on my system. Looking at the Csv Reader code, several opportunities to cache results and avoid function calls jumped out, none of which seem to materially add to the maintenance burden of the program. Testing these changes resulted in a run time of about 20 minutes, still hardly a thing of beauty, but a huge improvement over the original and therefore worth proceeding with. * Redo CsvIssue2232Test Test cases included duplicates, and didn't account for some things (e.g. French locale will treat both 'vrai' and 'true' as true). * Additional Optimization
1 parent bc9ca28 commit 1282f3d

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

src/PhpSpreadsheet/Reader/Csv.php

+37-18
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
1010
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
1111
use PhpOffice\PhpSpreadsheet\Spreadsheet;
12-
use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
1312
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
1413

1514
class Csv extends BaseReader
@@ -102,6 +101,14 @@ class Csv extends BaseReader
102101
/** @var bool */
103102
private $sheetNameIsFileName = false;
104103

104+
private string $getTrue = 'true';
105+
106+
private string $getFalse = 'false';
107+
108+
private string $thousandsSeparator = ',';
109+
110+
private string $decimalSeparator = '.';
111+
105112
/**
106113
* Create a new CSV Reader instance.
107114
*/
@@ -234,13 +241,14 @@ public function listWorksheetInfo(string $filename): array
234241
$worksheetInfo[0]['lastColumnIndex'] = 0;
235242
$worksheetInfo[0]['totalRows'] = 0;
236243
$worksheetInfo[0]['totalColumns'] = 0;
244+
$delimiter = $this->delimiter ?? '';
237245

238246
// Loop through each line of the file in turn
239-
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
247+
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
240248
while (is_array($rowData)) {
241249
++$worksheetInfo[0]['totalRows'];
242250
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
243-
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
251+
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
244252
}
245253

246254
$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
@@ -386,15 +394,24 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
386394
$outRow = 0;
387395

388396
// Loop through each line of the file in turn
389-
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
397+
$delimiter = $this->delimiter ?? '';
398+
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
390399
$valueBinder = Cell::getValueBinder();
391400
$preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
401+
$this->getTrue = Calculation::getTRUE();
402+
$this->getFalse = Calculation::getFALSE();
403+
$this->thousandsSeparator = StringHelper::getThousandsSeparator();
404+
$this->decimalSeparator = StringHelper::getDecimalSeparator();
392405
while (is_array($rowData)) {
393406
$noOutputYet = true;
394407
$columnLetter = 'A';
395408
foreach ($rowData as $rowDatum) {
396-
$this->convertBoolean($rowDatum, $preserveBooleanString);
397-
$numberFormatMask = $this->convertFormattedNumber($rowDatum);
409+
if ($preserveBooleanString) {
410+
$rowDatum = $rowDatum ?? '';
411+
} else {
412+
$this->convertBoolean($rowDatum);
413+
}
414+
$numberFormatMask = $this->castFormattedNumberToNumeric ? $this->convertFormattedNumber($rowDatum) : '';
398415
if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
399416
if ($this->contiguous) {
400417
if ($noOutputYet) {
@@ -405,15 +422,17 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
405422
$outRow = $currentRow;
406423
}
407424
// Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
408-
$sheet->getCell($columnLetter . $outRow)->getStyle()
409-
->getNumberFormat()
410-
->setFormatCode($numberFormatMask);
425+
if ($numberFormatMask !== '') {
426+
$sheet->getStyle($columnLetter . $outRow)
427+
->getNumberFormat()
428+
->setFormatCode($numberFormatMask);
429+
}
411430
// Set cell value
412431
$sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
413432
}
414433
++$columnLetter;
415434
}
416-
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
435+
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
417436
++$currentRow;
418437
}
419438

@@ -429,12 +448,12 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
429448
/**
430449
* Convert string true/false to boolean, and null to null-string.
431450
*/
432-
private function convertBoolean(mixed &$rowDatum, bool $preserveBooleanString): void
451+
private function convertBoolean(mixed &$rowDatum): void
433452
{
434-
if (is_string($rowDatum) && !$preserveBooleanString) {
435-
if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
453+
if (is_string($rowDatum)) {
454+
if (strcasecmp($this->getTrue, $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
436455
$rowDatum = true;
437-
} elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
456+
} elseif (strcasecmp($this->getFalse, $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
438457
$rowDatum = false;
439458
}
440459
} else {
@@ -447,18 +466,18 @@ private function convertBoolean(mixed &$rowDatum, bool $preserveBooleanString):
447466
*/
448467
private function convertFormattedNumber(mixed &$rowDatum): string
449468
{
450-
$numberFormatMask = NumberFormat::FORMAT_GENERAL;
469+
$numberFormatMask = '';
451470
if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
452471
$numeric = str_replace(
453-
[StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
472+
[$this->thousandsSeparator, $this->decimalSeparator],
454473
['', '.'],
455474
$rowDatum
456475
);
457476

458477
if (is_numeric($numeric)) {
459-
$decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
478+
$decimalPos = strpos($rowDatum, $this->decimalSeparator);
460479
if ($this->preserveNumericFormatting === true) {
461-
$numberFormatMask = (str_contains($rowDatum, StringHelper::getThousandsSeparator()))
480+
$numberFormatMask = (str_contains($rowDatum, $this->thousandsSeparator))
462481
? '#,##0' : '0';
463482
if ($decimalPos !== false) {
464483
$decimals = strlen($rowDatum) - $decimalPos - 1;

tests/PhpSpreadsheetTests/Reader/Csv/CsvIssue2232Test.php

+6-5
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public static function providerIssue2232(): array
6565
/**
6666
* @dataProvider providerIssue2232locale
6767
*/
68-
public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $preserveBoolString, mixed $b4Value, mixed $b5Value): void
68+
public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $preserveBoolString, mixed $b2Value, mixed $b3Value, mixed $b4Value, mixed $b5Value): void
6969
{
7070
if ($useStringBinder) {
7171
$binder = new StringValueBinder();
@@ -81,6 +81,8 @@ public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $
8181
$filename = 'tests/data/Reader/CSV/issue.2232.csv';
8282
$spreadsheet = $reader->load($filename);
8383
$sheet = $spreadsheet->getActiveSheet();
84+
self::assertSame($b2Value, $sheet->getCell('B2')->getValue());
85+
self::assertSame($b3Value, $sheet->getCell('B3')->getValue());
8486
self::assertSame($b4Value, $sheet->getCell('B4')->getValue());
8587
self::assertSame($b5Value, $sheet->getCell('B5')->getValue());
8688
$spreadsheet->disconnectWorksheets();
@@ -89,10 +91,9 @@ public function testBooleanConversionsLocaleAware(bool $useStringBinder, ?bool $
8991
public static function providerIssue2232locale(): array
9092
{
9193
return [
92-
[true, true, 'Faux', 'Vrai'],
93-
[true, true, 'Faux', 'Vrai'],
94-
[false, false, false, true],
95-
[false, false, false, true],
94+
'string binder preserve boolean string' => [true, true, 'FaLSe', 'tRUE', 'Faux', 'Vrai'],
95+
'string binder convert boolean string' => [true, false, false, true, false, true],
96+
'default binder' => [false, null, false, true, false, true],
9697
];
9798
}
9899
}

0 commit comments

Comments
 (0)