@@ -113,15 +113,12 @@ private static function forceString($arg): string
113
113
*/
114
114
private function toUtf8 ($ xml )
115
115
{
116
- $ pattern = '/encoding="(.*?)"/ ' ;
117
- $ result = preg_match ($ pattern , $ xml , $ matches );
118
- $ charset = strtoupper ($ result ? $ matches [1 ] : 'UTF-8 ' );
116
+ $ charset = $ this ->findCharSet ($ xml );
119
117
120
118
if ($ charset !== 'UTF-8 ' ) {
121
119
$ xml = self ::forceString (mb_convert_encoding ($ xml , 'UTF-8 ' , $ charset ));
122
120
123
- $ result = preg_match ($ pattern , $ xml , $ matches );
124
- $ charset = strtoupper ($ result ? $ matches [1 ] : 'UTF-8 ' );
121
+ $ charset = $ this ->findCharSet ($ xml );
125
122
if ($ charset !== 'UTF-8 ' ) {
126
123
throw new Reader \Exception ('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks ' );
127
124
}
@@ -169,4 +166,20 @@ public function scanFile($filestream)
169
166
{
170
167
return $ this ->scan (file_get_contents ($ filestream ));
171
168
}
169
+
170
+ private function findCharSet (string $ xml ): string
171
+ {
172
+ $ patterns = [
173
+ '/encoding="([^"]*]?)"/ ' ,
174
+ "/encoding='([^']*?)'/ " ,
175
+ ];
176
+
177
+ foreach ($ patterns as $ pattern ) {
178
+ if (preg_match ($ pattern , $ xml , $ matches )) {
179
+ return strtoupper ($ matches [1 ]);
180
+ }
181
+ }
182
+
183
+ return 'UTF-8 ' ;
184
+ }
172
185
}
0 commit comments