Skip to content

Commit e1fcd82

Browse files
rolandshoemakergopherbot
authored andcommitted
html: properly handle trailing solidus in unquoted attribute value in foreign content
The parser properly treats tags like <p a=/> as <p a="/">, but the tokenizer emits the SelfClosingTagToken token incorrectly. When the parser is used to parse foreign content, this results in an incorrect DOM. Thanks to Sean Ng (https://ensy.zip) for reporting this issue. Fixes golang/go#73070 Fixes CVE-2025-22872 Change-Id: I65c18df6d6244bf943b61e6c7a87895929e78f4f Reviewed-on: https://go-review.googlesource.com/c/net/+/661256 Reviewed-by: Neal Patel <[email protected]> Reviewed-by: Roland Shoemaker <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Gopher Robot <[email protected]>
1 parent ebed060 commit e1fcd82

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

html/token.go

+16-2
Original file line numberDiff line numberDiff line change
@@ -839,8 +839,22 @@ func (z *Tokenizer) readStartTag() TokenType {
839839
if raw {
840840
z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
841841
}
842-
// Look for a self-closing token like "<br/>".
843-
if z.err == nil && z.buf[z.raw.end-2] == '/' {
842+
// Look for a self-closing token (e.g. <br/>).
843+
//
844+
// Originally, we did this by just checking that the last character of the
845+
// tag (ignoring the closing bracket) was a solidus (/) character, but this
846+
// is not always accurate.
847+
//
848+
// We need to be careful that we don't misinterpret a non-self-closing tag
849+
// as self-closing, as can happen if the tag contains unquoted attribute
850+
// values (i.e. <p a=/>).
851+
//
852+
// To avoid this, we check that the last non-bracket character of the tag
853+
// (z.raw.end-2) isn't the same character as the last non-quote character of
854+
// the last attribute of the tag (z.pendingAttr[1].end-1), if the tag has
855+
// attributes.
856+
nAttrs := len(z.attr)
857+
if z.err == nil && z.buf[z.raw.end-2] == '/' && (nAttrs == 0 || z.raw.end-2 != z.attr[nAttrs-1][1].end-1) {
844858
return SelfClosingTagToken
845859
}
846860
return StartTagToken

html/token_test.go

+18
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,16 @@ var tokenTests = []tokenTest{
616616
`<p a/ ="">`,
617617
`<p a="" =""="">`,
618618
},
619+
{
620+
"slash at end of unquoted attribute value",
621+
`<p a="\">`,
622+
`<p a="\">`,
623+
},
624+
{
625+
"self-closing tag with attribute",
626+
`<p a=/>`,
627+
`<p a="/">`,
628+
},
619629
}
620630

621631
func TestTokenizer(t *testing.T) {
@@ -815,6 +825,14 @@ func TestReaderEdgeCases(t *testing.T) {
815825
}
816826
}
817827

828+
func TestSelfClosingTagValueConfusion(t *testing.T) {
829+
z := NewTokenizer(strings.NewReader(`<p a=/>`))
830+
tok := z.Next()
831+
if tok != StartTagToken {
832+
t.Fatalf("unexpected token type: got %s, want %s", tok, StartTagToken)
833+
}
834+
}
835+
818836
// zeroOneByteReader is like a strings.Reader that alternates between
819837
// returning 0 bytes and 1 byte at a time.
820838
type zeroOneByteReader struct {

0 commit comments

Comments
 (0)