Skip to content

Commit 1a27422

Browse files
committed
HTML API: Include doctype in full parser serialize.
Output DOCTYPE when calling `WP_HTML_Processor::serialize` on a full document that includes a DOCTYPE. The DOCTYPE should be included in the serialized/normalized HTML output as it has an impact in how the document is handled, in particular whether the document should be handled in quirks or no-quirks mode. This only affects the serialization of full parsers at this time because DOCTYPE tokens are currently ignored in all possible fragments. The omission of the DOCTYPE is subtle but can change the serialized document's quirks/no-quirks mode. Props jonsurrell. Fixes #62396. git-svn-id: https://develop.svn.wordpress.org/trunk@59399 602fd350-edb4-49c9-b593-d223f7449a82
1 parent db2f6fe commit 1a27422

File tree

2 files changed

+57
-4
lines changed

2 files changed

+57
-4
lines changed

src/wp-includes/html-api/class-wp-html-processor.php

+24-4
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,30 @@ protected function serialize_token(): string {
11781178
$token_type = $this->get_token_type();
11791179

11801180
switch ( $token_type ) {
1181+
case '#doctype':
1182+
$doctype = $this->get_doctype_info();
1183+
if ( null === $doctype ) {
1184+
break;
1185+
}
1186+
1187+
$html .= '<!DOCTYPE';
1188+
1189+
if ( $doctype->name ) {
1190+
$html .= " {$doctype->name}";
1191+
}
1192+
1193+
if ( null !== $doctype->public_identifier ) {
1194+
$html .= " PUBLIC \"{$doctype->public_identifier}\"";
1195+
}
1196+
if ( null !== $doctype->system_identifier ) {
1197+
if ( null === $doctype->public_identifier ) {
1198+
$html .= ' SYSTEM';
1199+
}
1200+
$html .= " \"{$doctype->system_identifier}\"";
1201+
}
1202+
$html .= '>';
1203+
break;
1204+
11811205
case '#text':
11821206
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
11831207
break;
@@ -1194,10 +1218,6 @@ protected function serialize_token(): string {
11941218
case '#cdata-section':
11951219
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
11961220
break;
1197-
1198-
case 'html':
1199-
$html .= '<!DOCTYPE html>';
1200-
break;
12011221
}
12021222

12031223
if ( '#tag' !== $token_type ) {

tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

+33
Original file line numberDiff line numberDiff line change
@@ -284,4 +284,37 @@ public static function data_tokens_with_null_bytes() {
284284
'Comment text' => array( "<!-- \x00 -->", "<!-- \u{FFFD} -->" ),
285285
);
286286
}
287+
288+
/**
289+
* @ticket 62396
290+
*
291+
* @dataProvider data_provider_serialize_doctype
292+
*/
293+
public function test_full_document_serialize_includes_doctype( string $doctype_input, string $doctype_output ) {
294+
$processor = WP_HTML_Processor::create_full_parser(
295+
"{$doctype_input}👌"
296+
);
297+
$this->assertSame(
298+
"{$doctype_output}<html><head></head><body>👌</body></html>",
299+
$processor->serialize()
300+
);
301+
}
302+
303+
/**
304+
* Data provider.
305+
*
306+
* @return array[]
307+
*/
308+
public static function data_provider_serialize_doctype() {
309+
return array(
310+
'None' => array( '', '' ),
311+
'Empty' => array( '<!DOCTYPE>', '<!DOCTYPE>' ),
312+
'HTML5' => array( '<!DOCTYPE html>', '<!DOCTYPE html>' ),
313+
'Strange name' => array( '<!DOCTYPE WordPress>', '<!DOCTYPE wordpress>' ),
314+
'With public' => array( '<!DOCTYPE html PUBLIC "x">', '<!DOCTYPE html PUBLIC "x">' ),
315+
'With system' => array( '<!DOCTYPE html SYSTEM "y">', '<!DOCTYPE html SYSTEM "y">' ),
316+
'With public and system' => array( '<!DOCTYPE html PUBLIC "x" "y">', '<!DOCTYPE html PUBLIC "x" "y">' ),
317+
'Weird casing' => array( '<!docType HtmL pubLIc\'xxx\'"yyy" all this is ignored>', '<!DOCTYPE html PUBLIC "xxx" "yyy">' ),
318+
);
319+
}
287320
}

0 commit comments

Comments
 (0)