Skip to content

Commit 603d093

Browse files
authored
Specify data type in html tags using attributes (#3445)
* Specify data type in html tags using attributes #3444 * Set data types using attributes in flushCell function and add some unit tests * Check TYPE_INLINE in flushCell and some changes in test cases
1 parent 452ec07 commit 603d093

File tree

2 files changed

+71
-11
lines changed

2 files changed

+71
-11
lines changed

src/PhpSpreadsheet/Reader/Html.php

+32-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use DOMNode;
88
use DOMText;
99
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10+
use PhpOffice\PhpSpreadsheet\Cell\DataType;
1011
use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
1112
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1213
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -283,15 +284,35 @@ protected function releaseTableStartColumn(): string
283284
* @param int|string $row
284285
* @param mixed $cellContent
285286
*/
286-
protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent): void
287+
protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent, array $attributeArray): void
287288
{
288289
if (is_string($cellContent)) {
289290
// Simple String content
290291
if (trim($cellContent) > '') {
291292
// Only actually write it if there's content in the string
292293
// Write to worksheet to be done here...
293-
// ... we return the cell so we can mess about with styles more easily
294-
$sheet->setCellValue($column . $row, $cellContent);
294+
// ... we return the cell, so we can mess about with styles more easily
295+
296+
// Set cell value explicitly if there is data-type attribute
297+
if (isset($attributeArray['data-type'])) {
298+
$datatype = $attributeArray['data-type'];
299+
if (in_array($datatype, [DataType::TYPE_STRING, DataType::TYPE_STRING2, DataType::TYPE_INLINE])) {
300+
//Prevent to Excel treat string with beginning equal sign or convert big numbers to scientific number
301+
if (substr($cellContent, 0, 1) === '=') {
302+
$sheet->getCell($column . $row)
303+
->getStyle()
304+
->setQuotePrefix(true);
305+
}
306+
}
307+
//catching the Exception and ignoring the invalid data types
308+
try {
309+
$sheet->setCellValueExplicit($column . $row, $cellContent, $attributeArray['data-type']);
310+
} catch (\PhpOffice\PhpSpreadsheet\Exception $exception) {
311+
$sheet->setCellValue($column . $row, $cellContent);
312+
}
313+
} else {
314+
$sheet->setCellValue($column . $row, $cellContent);
315+
}
295316
$this->dataArray[$row][$column] = $cellContent;
296317
}
297318
} else {
@@ -355,7 +376,7 @@ private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$
355376
private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
356377
{
357378
if ($child->nodeName === 'hr') {
358-
$this->flushCell($sheet, $column, $row, $cellContent);
379+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
359380
++$row;
360381
if (isset($this->formats[$child->nodeName])) {
361382
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
@@ -375,7 +396,7 @@ private function processDomElementBr(Worksheet $sheet, int &$row, string &$colum
375396
$sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
376397
} else {
377398
// Otherwise flush our existing content and move the row cursor on
378-
$this->flushCell($sheet, $column, $row, $cellContent);
399+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
379400
++$row;
380401
}
381402
} else {
@@ -421,11 +442,11 @@ private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$co
421442
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
422443
} else {
423444
if ($cellContent > '') {
424-
$this->flushCell($sheet, $column, $row, $cellContent);
445+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
425446
++$row;
426447
}
427448
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
428-
$this->flushCell($sheet, $column, $row, $cellContent);
449+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
429450

430451
if (isset($this->formats[$child->nodeName])) {
431452
$sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
@@ -448,11 +469,11 @@ private function processDomElementLi(Worksheet $sheet, int &$row, string &$colum
448469
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
449470
} else {
450471
if ($cellContent > '') {
451-
$this->flushCell($sheet, $column, $row, $cellContent);
472+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
452473
}
453474
++$row;
454475
$this->processDomElement($child, $sheet, $row, $column, $cellContent);
455-
$this->flushCell($sheet, $column, $row, $cellContent);
476+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
456477
$column = 'A';
457478
}
458479
} else {
@@ -472,7 +493,7 @@ private function processDomElementImg(Worksheet $sheet, int &$row, string &$colu
472493
private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
473494
{
474495
if ($child->nodeName === 'table') {
475-
$this->flushCell($sheet, $column, $row, $cellContent);
496+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
476497
$column = $this->setTableStartColumn($column);
477498
if ($this->tableLevel > 1 && $row > 1) {
478499
--$row;
@@ -574,7 +595,7 @@ private function processDomElementThTd(Worksheet $sheet, int &$row, string &$col
574595
// apply inline style
575596
$this->applyInlineStyle($sheet, $row, $column, $attributeArray);
576597

577-
$this->flushCell($sheet, $column, $row, $cellContent);
598+
$this->flushCell($sheet, $column, $row, $cellContent, $attributeArray);
578599

579600
$this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
580601
$this->processDomElementWidth($sheet, $column, $attributeArray);

tests/PhpSpreadsheetTests/Reader/Html/HtmlTest.php

+39
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
44

5+
use PhpOffice\PhpSpreadsheet\Cell\DataType;
56
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
67
use PhpOffice\PhpSpreadsheet\Reader\Html;
78
use PhpOffice\PhpSpreadsheet\Style\Alignment;
@@ -380,4 +381,42 @@ public function testBorderWithColspan(): void
380381
}
381382
$spreadsheet->disconnectWorksheets();
382383
}
384+
385+
public function testDataType(): void
386+
{
387+
$html = '<table>
388+
<tr>
389+
<td data-type="b">1</td>
390+
<td data-type="s">12345678987654</td>
391+
<!-- in some cases, you may want to treat the string with beginning equal sign as a string rather than a formula -->
392+
<td data-type="s">=B1</td>
393+
<td data-type="d">2022-02-21 10:20:30</td>
394+
<td data-type="null">null</td>
395+
<td data-type="invalid-datatype">text with invalid datatype</td>
396+
</tr>
397+
</table>';
398+
399+
$reader = new Html();
400+
$spreadsheet = $reader->loadFromString($html);
401+
$firstSheet = $spreadsheet->getSheet(0);
402+
403+
// check boolean data type
404+
self::assertEquals(DataType::TYPE_BOOL, $firstSheet->getCell('A1')->getDataType());
405+
self::assertIsBool($firstSheet->getCell('A1')->getValue());
406+
407+
// check string data type
408+
self::assertEquals(DataType::TYPE_STRING, $firstSheet->getCell('B1')->getDataType());
409+
self::assertIsString($firstSheet->getCell('B1')->getValue());
410+
411+
// check string with beginning equal sign (=B1) and string datatype,is not formula
412+
self::assertEquals(DataType::TYPE_STRING, $firstSheet->getCell('C1')->getDataType());
413+
self::assertEquals('=B1', $firstSheet->getCell('C1')->getValue());
414+
self::assertTrue($firstSheet->getCell('C1')->getStyle()->getQuotePrefix());
415+
416+
//check iso date
417+
self::assertEqualsWithDelta($firstSheet->getCell('D1')->getValue(), 44613.43090277778, 1.0e-12);
418+
419+
//null
420+
self::assertEquals($firstSheet->getCell('E1')->getValue(), null);
421+
}
383422
}

0 commit comments

Comments
 (0)