Skip to content

Commit b570d75

Browse files
committed
Set data types using attributes in flushCell function and add some unit tests
1 parent 6b85c37 commit b570d75

File tree

2 files changed

+47
-25
lines changed

2 files changed

+47
-25
lines changed

src/PhpSpreadsheet/Reader/Html.php

+31-24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use DOMNode;
88
use DOMText;
99
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10+
use PhpOffice\PhpSpreadsheet\Cell\DataType;
1011
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
1112
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1213
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -283,15 +284,34 @@ protected function releaseTableStartColumn(): string
283284
* @param int|string $row
284285
* @param mixed $cellContent
285286
*/
286-
protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent): void
287+
protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent, $attributeArray): void
287288
{
288289
if (is_string($cellContent)) {
289290
// Simple String content
290291
if (trim($cellContent) > '') {
291292
// Only actually write it if there's content in the string
292293
// Write to worksheet to be done here...
293-
// ... we return the cell so we can mess about with styles more easily
294-
$sheet->setCellValue($column . $row, $cellContent);
294+
// ... we return the cell, so we can mess about with styles more easily
295+
296+
// Set cell value explicitly if there is data-type attribute
297+
if (isset($attributeArray['data-type'])) {
298+
$datatype = $attributeArray['data-type'];
299+
if ($datatype == DataType::TYPE_STRING || $datatype == DataType::TYPE_STRING2) {
300+
if (substr($cellContent, 0, 1) === '=' || is_numeric($cellContent)) {
301+
$sheet->getCell($column . $row)
302+
->getStyle()
303+
->setQuotePrefix(true);
304+
}
305+
}
306+
//catching the Exception and ignoring the invalid data types
307+
try {
308+
$sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
309+
} catch (\PhpOffice\PhpSpreadsheet\Exception $exception) {
310+
$sheet->setCellValue($column . $row, $cellContent);
311+
}
312+
} else {
313+
$sheet->setCellValue($column . $row, $cellContent);
314+
}
295315
$this->dataArray[$row][$column] = $cellContent;
296316
}
297317
} else {
@@ -355,7 +375,7 @@ private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$
355375
private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
356376
{
357377
if ($child->nodeName === 'hr') {
358-
$this->flushCell($sheet, $column, $row, $cellContent);
378+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
359379
++$row;
360380
if (isset($this->formats[$child->nodeName])) {
361381
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
@@ -375,7 +395,7 @@ private function processDomElementBr(Worksheet $sheet, int &$row, string &$colum
375395
$sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
376396
} else {
377397
// Otherwise flush our existing content and move the row cursor on
378-
$this->flushCell($sheet, $column, $row, $cellContent);
398+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
379399
++$row;
380400
}
381401
} else {
@@ -421,11 +441,11 @@ private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$co
421441
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
422442
} else {
423443
if ($cellContent > '') {
424-
$this->flushCell($sheet, $column, $row, $cellContent);
444+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
425445
++$row;
426446
}
427447
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
428-
$this->flushCell($sheet, $column, $row, $cellContent);
448+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
429449

430450
if (isset($this->formats[$child->nodeName])) {
431451
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
@@ -448,11 +468,11 @@ private function processDomElementLi(Worksheet $sheet, int &$row, string &$colum
448468
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
449469
} else {
450470
if ($cellContent > '') {
451-
$this->flushCell($sheet, $column, $row, $cellContent);
471+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
452472
}
453473
++$row;
454474
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
455-
$this->flushCell($sheet, $column, $row, $cellContent);
475+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
456476
$column = 'A';
457477
}
458478
} else {
@@ -472,7 +492,7 @@ private function processDomElementImg(Worksheet $sheet, int &$row, string &$colu
472492
private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
473493
{
474494
if ($child->nodeName === 'table') {
475-
$this->flushCell($sheet, $column, $row, $cellContent);
495+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
476496
$column = $this->setTableStartColumn($column);
477497
if ($this->tableLevel > 1 && $row > 1) {
478498
--$row;
@@ -564,18 +584,6 @@ private function processDomElementDataFormat(Worksheet $sheet, int $row, string
564584
}
565585
}
566586

567-
private function processDomElementDataType(Worksheet $sheet, int $row, string $column, array $attributeArray): void
568-
{
569-
if (isset($attributeArray['data-type'])) {
570-
$cell = $sheet->getCell($column . $row);
571-
$cellValue = $cell->getValue();
572-
//cast value to the datatype
573-
$sheet->setCellValueExplicit($column . $row, $cellValue, $attributeArray['data-type']);
574-
//set datatype
575-
$cell->setDataType($attributeArray['data-type']);
576-
}
577-
}
578-
579587
private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
580588
{
581589
while (isset($this->rowspan[$column . $row])) {
@@ -586,15 +594,14 @@ private function processDomElementThTd(Worksheet $sheet, int &$row, string &$col
586594
// apply inline style
587595
$this->applyInlineStyle($sheet, $row, $column, $attributeArray);
588596

589-
$this->flushCell($sheet, $column, $row, $cellContent);
597+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
590598

591599
$this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
592600
$this->processDomElementWidth($sheet, $column, $attributeArray);
593601
$this->processDomElementHeight($sheet, $row, $attributeArray);
594602
$this->processDomElementAlign($sheet, $row, $column, $attributeArray);
595603
$this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
596604
$this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
597-
$this->processDomElementDataType($sheet, $row, $column, $attributeArray);
598605

599606
if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
600607
//create merging rowspan and colspan

tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php

+16-1
Original file line numberDiff line numberDiff line change
@@ -370,10 +370,15 @@ public function testDataType(): void
370370
$html = '<table>
371371
<tr>
372372
<td data-type="b">1</td>
373-
<td data-type="s">12345678987654321</td>
373+
<td data-type="s">1234567</td>
374374
<td data-type="f">=CONCAT("TEXT A ","TEXT B")</td>
375+
<!-- in some cases, you may want to treat the string with beginning equal sign as a string rather than a formula -->
376+
<td data-type="s">=B1</td>
377+
<td data-type="d">2022-02-21 10:20:30</td>
378+
<td data-type="invalid-datatype">text</td>
375379
</tr>
376380
</table>';
381+
377382
$reader = new Html();
378383
$spreadsheet = $reader->loadFromString($html);
379384
$firstSheet = $spreadsheet->getSheet(0);
@@ -388,7 +393,17 @@ public function testDataType(): void
388393

389394
// check formula data type
390395
self::assertEquals(DataType::TYPE_FORMULA, $firstSheet->getCell('C1')->getDataType());
396+
391397
// check formula output
392398
self::assertEquals('TEXT A TEXT B', $firstSheet->getCell('C1')->getFormattedValue());
399+
400+
// check string with beginning equal sign is string and not formula
401+
self::assertEquals(DataType::TYPE_STRING, $firstSheet->getCell('D1')->getDataType());
402+
self::assertEquals('=B1', $firstSheet->getCell('D1')->getValue());
403+
self::assertIsString($firstSheet->getCell('D1')->getValue());
404+
self::assertTrue($firstSheet->getCell('D1')->getStyle()->getQuotePrefix());
405+
406+
//check iso date
407+
self::assertEquals($firstSheet->getCell('E1')->getValue(), 44613.43090277778);
393408
}
394409
}

0 commit comments

Comments
 (0)