Skip to content

Commit 070e285

Browse files
committed
add ability to set codepage explicitly for BIFF5
If BIFF5 excel 95 file doesn't have codepage record, the default codepage CP1252 is used and can't be change. That causes to problems with decoding cyrillic text.
1 parent ac7fb4a commit 070e285

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com)
66
and this project adheres to [Semantic Versioning](https://semver.org).
77

8+
## [Unreleased]
9+
10+
### Added
11+
12+
- Add ability to set codepage explicitly for BIFF5 [#1018](https://github.com/PHPOffice/PhpSpreadsheet/issues/1018)
13+
814
## [1.13.0] - 2020-05-31
915

1016
### Added

src/PhpSpreadsheet/Reader/Xls.php

+15-1
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,20 @@ public function canRead($pFilename)
439439
}
440440
}
441441

442+
/**
443+
* @param string $codepage
444+
*
445+
* @throws PhpSpreadsheetException
446+
*/
447+
public function setCodepage(string $codepage): void
448+
{
449+
if (!CodePage::validate($codepage)) {
450+
throw new PhpSpreadsheetException('Unknown codepage: ' . $codepage);
451+
}
452+
453+
$this->codepage = $codepage;
454+
}
455+
442456
/**
443457
* Reads names of the worksheets from a file, without parsing the whole file to a PhpSpreadsheet object.
444458
*
@@ -640,7 +654,7 @@ public function load($pFilename)
640654

641655
// initialize
642656
$this->pos = 0;
643-
$this->codepage = 'CP1252';
657+
$this->codepage = $this->codepage ?: CodePage::DEFAULT_CODE_PAGE;
644658
$this->formats = [];
645659
$this->objFonts = [];
646660
$this->palette = [];

src/PhpSpreadsheet/Shared/CodePage.php

+14
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
class CodePage
88
{
9+
public const DEFAULT_CODE_PAGE = 'CP1252';
10+
911
private static $pageArray = [
1012
0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
1113
367 => 'ASCII', // ASCII
@@ -65,13 +67,25 @@ class CodePage
6567
65001 => 'UTF-8', // Unicode (UTF-8)
6668
];
6769

70+
/**
71+
* @param string $codePage
72+
*
73+
* @return bool
74+
*/
75+
public static function validate(string $codePage): bool
76+
{
77+
return (in_array($codePage, self::$pageArray, true));
78+
}
79+
6880
/**
6981
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
7082
* and mbstring understands.
7183
*
7284
* @param int $codePage Microsoft Code Page Indentifier
7385
*
7486
* @return string Code Page Name
87+
*
88+
* @throws PhpSpreadsheetException
7589
*/
7690
public static function numberToName(int $codePage): string
7791
{

0 commit comments

Comments
 (0)