Skip to content

Commit 37a8a56

Browse files
authored
Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 (#3672)
* Read Code Page for Xls ListWorksheetInfo/Names for BIFF5 Fix #3671. Xls reader was not processing Code Page as part of functions ListWorksheetInfo/Names, which was causing them to fail for for BIFF5 (and BIFF7); this was not a problem for BIFF8. There were no unit tests for these functions for either BIFF5 or BIFF8. There are now. * Add getVersion and getCodePage Methods These came about because test file for non-standard codepage was supposed to be BIFF5, but turned out to be BIFF8 using UTF-16 with some string data otherwise encoded. Add a BIFF5 equivalent (some hex editing was required), and the means to distinguish one from the other. * Found MACCENTRALEUROPE Text in BIFF8 It was used for 'Last Modified By' property, even though bulk of spreadsheet uses UTF-16LE. Add a test.
1 parent dd97b8f commit 37a8a56

File tree

4 files changed

+162
-37
lines changed

4 files changed

+162
-37
lines changed

src/PhpSpreadsheet/Reader/Xls.php

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,15 +213,15 @@ class Xls extends BaseReader
213213
*
214214
* @var int
215215
*/
216-
private $version;
216+
private $version = 0;
217217

218218
/**
219219
* Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
220220
* For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'.
221221
*
222222
* @var string
223223
*/
224-
private $codepage;
224+
private $codepage = '';
225225

226226
/**
227227
* Shared formats.
@@ -459,6 +459,11 @@ public function setCodepage(string $codepage): void
459459
$this->codepage = $codepage;
460460
}
461461

462+
public function getCodepage(): string
463+
{
464+
return $this->codepage;
465+
}
466+
462467
/**
463468
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
464469
*
@@ -498,6 +503,10 @@ public function listWorksheetNames($filename)
498503
$this->readDefault();
499504

500505
break 2;
506+
case self::XLS_TYPE_CODEPAGE:
507+
$this->readCodepage();
508+
509+
break;
501510
default:
502511
$this->readDefault();
503512

@@ -557,6 +566,10 @@ public function listWorksheetInfo($filename)
557566
$this->readDefault();
558567

559568
break 2;
569+
case self::XLS_TYPE_CODEPAGE:
570+
$this->readCodepage();
571+
572+
break;
560573
default:
561574
$this->readDefault();
562575

@@ -8088,4 +8101,9 @@ private function setCFRules(array $cellRanges, string $type, string $operator, $
80888101
$this->phpSheet->getStyle($cellRange)->setConditionalStyles($conditionalStyles);
80898102
}
80908103
}
8104+
8105+
public function getVersion(): int
8106+
{
8107+
return $this->version;
8108+
}
80918109
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
<?php
2+
3+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Xls;
4+
5+
use PhpOffice\PhpSpreadsheet\Reader\Xls;
6+
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
7+
use PHPUnit\Framework\TestCase;
8+
9+
class InfoNamesTest extends TestCase
10+
{
11+
public function testWorksheetNamesBiff5(): void
12+
{
13+
$filename = 'samples/templates/30templatebiff5.xls';
14+
$reader = new Xls();
15+
$names = $reader->listWorksheetNames($filename);
16+
$expected = ['Invoice', 'Terms and conditions'];
17+
self::assertSame($expected, $names);
18+
}
19+
20+
public function testWorksheetInfoBiff5(): void
21+
{
22+
$filename = 'samples/templates/30templatebiff5.xls';
23+
$reader = new Xls();
24+
$info = $reader->listWorksheetInfo($filename);
25+
$expected = [
26+
[
27+
'worksheetName' => 'Invoice',
28+
'lastColumnLetter' => 'E',
29+
'lastColumnIndex' => 4,
30+
'totalRows' => 19,
31+
'totalColumns' => 5,
32+
],
33+
[
34+
'worksheetName' => 'Terms and conditions',
35+
'lastColumnLetter' => 'B',
36+
'lastColumnIndex' => 1,
37+
'totalRows' => 3,
38+
'totalColumns' => 2,
39+
],
40+
];
41+
self::assertSame($expected, $info);
42+
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
43+
self::assertSame('CP1252', $reader->getCodepage());
44+
}
45+
46+
public function testWorksheetNamesBiff8(): void
47+
{
48+
$filename = 'samples/templates/31docproperties.xls';
49+
$reader = new Xls();
50+
$names = $reader->listWorksheetNames($filename);
51+
$expected = ['Worksheet'];
52+
self::assertSame($expected, $names);
53+
}
54+
55+
public function testWorksheetInfoBiff8(): void
56+
{
57+
$filename = 'samples/templates/31docproperties.xls';
58+
$reader = new Xls();
59+
$info = $reader->listWorksheetInfo($filename);
60+
$expected = [
61+
[
62+
'worksheetName' => 'Worksheet',
63+
'lastColumnLetter' => 'B',
64+
'lastColumnIndex' => 1,
65+
'totalRows' => 1,
66+
'totalColumns' => 2,
67+
],
68+
];
69+
self::assertSame($expected, $info);
70+
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
71+
self::assertSame('UTF-16LE', $reader->getCodepage());
72+
}
73+
74+
/**
75+
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
76+
* as MAC-CENTRALEUROPE on some systems. Issue #549.
77+
*/
78+
private const MAC_CE = ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'];
79+
80+
private const MAC_FILE5 = 'tests/data/Reader/XLS/maccentraleurope.biff5.xls';
81+
private const MAC_FILE8 = 'tests/data/Reader/XLS/maccentraleurope.xls';
82+
83+
public function testWorksheetNamesBiff5Mac(): void
84+
{
85+
$codePages = CodePage::getEncodings();
86+
self::assertSame(self::MAC_CE, $codePages[10029]);
87+
$reader = new Xls();
88+
$names = $reader->listWorksheetNames(self::MAC_FILE5);
89+
$expected = ['Ärkusz1'];
90+
self::assertSame($expected, $names);
91+
}
92+
93+
public function testWorksheetInfoBiff5Mac(): void
94+
{
95+
$codePages = CodePage::getEncodings();
96+
// prior test has replaced array with single string
97+
self::assertContains($codePages[10029], self::MAC_CE);
98+
$reader = new Xls();
99+
$info = $reader->listWorksheetInfo(self::MAC_FILE5);
100+
$expected = [
101+
[
102+
'worksheetName' => 'Ärkusz1',
103+
'lastColumnLetter' => 'P',
104+
'lastColumnIndex' => 15,
105+
'totalRows' => 3,
106+
'totalColumns' => 16,
107+
],
108+
];
109+
self::assertSame($expected, $info);
110+
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
111+
self::assertContains($reader->getCodepage(), self::MAC_CE);
112+
}
113+
114+
public function testLoadMacCentralEuropeBiff5(): void
115+
{
116+
$reader = new Xls();
117+
$spreadsheet = $reader->load(self::MAC_FILE5);
118+
$sheet = $spreadsheet->getActiveSheet();
119+
self::assertSame('Ärkusz1', $sheet->getTitle());
120+
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
121+
self::assertSame(Xls::XLS_BIFF7, $reader->getVersion());
122+
self::assertContains($reader->getCodepage(), self::MAC_CE);
123+
$spreadsheet->disconnectWorksheets();
124+
}
125+
126+
public function testLoadMacCentralEuropeBiff8(): void
127+
{
128+
// Document is UTF-16LE as a whole,
129+
// but some strings are stored as MACCENTRALEUROPE
130+
$reader = new Xls();
131+
$spreadsheet = $reader->load(self::MAC_FILE8);
132+
$sheet = $spreadsheet->getActiveSheet();
133+
self::assertSame('Arkusz1', $sheet->getTitle());
134+
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
135+
self::assertSame(Xls::XLS_BIFF8, $reader->getVersion());
136+
self::assertSame('UTF-16LE', $reader->getCodepage());
137+
$properties = $spreadsheet->getProperties();
138+
// the following is stored as MACCENTRALEUROPE, not UTF-16LE
139+
self::assertSame('Użytkownik Microsoft Office', $properties->getLastModifiedBy());
140+
$spreadsheet->disconnectWorksheets();
141+
}
142+
}

tests/PhpSpreadsheetTests/Reader/Xls/XlsTest.php

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
use PhpOffice\PhpSpreadsheet\Cell\Cell;
66
use PhpOffice\PhpSpreadsheet\Reader\Xls;
7-
use PhpOffice\PhpSpreadsheet\Shared\CodePage;
87
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;
98

109
class XlsTest extends AbstractFunctional
@@ -88,40 +87,6 @@ public function testLoadXlsBug1592(): void
8887
$newspreadsheet->disconnectWorksheets();
8988
}
9089

91-
/**
92-
* Test load Xls file with MACCENTRALEUROPE encoding, which is implemented
93-
* as MAC-CENTRALEUROPE on some systems. Issue #549.
94-
*/
95-
public function testLoadMacCentralEurope(): void
96-
{
97-
$codePages = CodePage::getEncodings();
98-
self::assertIsArray($codePages[10029]);
99-
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
100-
$reader = new Xls();
101-
// When no fix applied, spreadsheet fails to load on some systems
102-
$spreadsheet = $reader->load($filename);
103-
$sheet = $spreadsheet->getActiveSheet();
104-
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
105-
$spreadsheet->disconnectWorksheets();
106-
}
107-
108-
/**
109-
* First test changes array entry in CodePage.
110-
* This test confirms new that new entry is okay.
111-
*/
112-
public function testLoadMacCentralEurope2(): void
113-
{
114-
$codePages = CodePage::getEncodings();
115-
self::assertIsString($codePages[10029]);
116-
$filename = 'tests/data/Reader/XLS/maccentraleurope.xls';
117-
$reader = new Xls();
118-
// When no fix applied, spreadsheet fails to load on some systems
119-
$spreadsheet = $reader->load($filename);
120-
$sheet = $spreadsheet->getActiveSheet();
121-
self::assertSame('Ładowność', $sheet->getCell('I1')->getValue());
122-
$spreadsheet->disconnectWorksheets();
123-
}
124-
12590
public function testLoadXlsBug1114(): void
12691
{
12792
$filename = 'tests/data/Reader/XLS/bug1114.xls';
18.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)