diff --git a/CHANGELOG.md b/CHANGELOG.md index 3044b11695..1380bd9951 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Formula Parser: Wrong line count for stuff like "MyOtherSheet!A:D" [#1215](https://github.com/PHPOffice/PhpSpreadsheet/issues/1215) - Call garbage collector after removing a column to prevent stale cached values - Trying to remove a column that doesn't exist deletes the latest column +- Deal with VML-Comments containing an new-line written by Excel 2013 - Keep big integer as integer instead of lossely casting to float [#874](https://github.com/PHPOffice/PhpSpreadsheet/pull/874) - Fix branch pruning handling of non boolean conditions [#1167](https://github.com/PHPOffice/PhpSpreadsheet/pull/1167) - Fix ODS Reader when no DC namespace are defined [#1182](https://github.com/PHPOffice/PhpSpreadsheet/pull/1182) diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index 4c10750524..386613194c 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader; +use DOMDocument; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Hyperlink; use PhpOffice\PhpSpreadsheet\NamedRange; @@ -900,8 +901,9 @@ public function load($pFilename) $relPath = File::realpath(dirname("$dir/$fileWorksheet") . '/' . $relPath); try { - $vmlCommentsFile = simplexml_load_string( - $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)), + $secureXml = $this->securityScanner->scan($this->getFromZipArchive($zip, $relPath)); + $vmlCommentsFile = $this->simplexml_load_string_of_html_too( + $secureXml, 'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); @@ -2037,6 +2039,7 @@ private function getWorkbookBaseName(ZipArchive $zip) $workbookBasename = $basename; } + break; } } @@ -2044,4 +2047,55 @@ private function getWorkbookBaseName(ZipArchive $zip) return $workbookBasename; } + + + /** + * Tries to load a string as XML and HTML. + * @throws \Exception + */ + protected function simplexml_load_string_of_html_too($string, $class_name = "SimpleXMLElement", $options = 0, $ns = "", $is_prefix = false) { + + try { + $xml = $this->simplexml_load_string($string, $class_name, $options, $ns, $is_prefix); + } catch (\Exception $e) { + $dom = new DOMDocument; + $rc = $dom->loadHTML($string, $options); + if(false === $rc) { + throw $e; + } + + $xml = simplexml_import_dom($dom, $class_name); + } + + return $xml; + } + + /** + * @param $string + * @param $class_name + * @param $options + * @param $ns + * @param $is_prefix + * @return SimpleXMLElement + * @throws \Exception + */ + protected function simplexml_load_string($string, $class_name, $options, $ns, $is_prefix): SimpleXMLElement + { + libxml_clear_errors(); + + $previous = libxml_use_internal_errors(true); + $xml = simplexml_load_string($string, $class_name, $options, $ns, $is_prefix); + libxml_use_internal_errors($previous); + + if (false === $xml) { + $message = ''; + foreach (libxml_get_errors() as $error) { + $trimmedMsg = trim($error->message); + $message .= "$trimmedMsg on line: $error->line, column: $error->column.\n"; + } + libxml_clear_errors(); + throw new \Exception($message); + } + return $xml; + } }