Skip to content

Commit e2f4134

Browse files
dsnetgopherbot
authored andcommitted
encoding/json: unify encodeState.string and encodeState.stringBytes
This is part of the effort to reduce direct reliance on bytes.Buffer so that we can use a buffer with better pooling characteristics. Unify these two methods as a single version that uses generics to reduce duplicated logic. Unfortunately, we lack a generic version of utf8.DecodeRune (see #56948), so we cast []byte to string. The []byte variant is slightly slower for multi-byte unicode since casting results in a stack-allocated copy operation. Fortunately, this code path is used only for TextMarshalers. We can also delete TestStringBytes, which exists to ensure that the two duplicate implementations remain in sync. Performance: name old time/op new time/op delta CodeEncoder 399µs ± 2% 409µs ± 2% +2.59% (p=0.000 n=9+9) CodeEncoderError 450µs ± 1% 451µs ± 2% ~ (p=0.684 n=10+10) CodeMarshal 553µs ± 2% 562µs ± 3% ~ (p=0.075 n=10+10) CodeMarshalError 733µs ± 3% 737µs ± 2% ~ (p=0.400 n=9+10) EncodeMarshaler 24.9ns ±12% 24.1ns ±13% ~ (p=0.190 n=10+10) EncoderEncode 12.3ns ± 3% 14.7ns ±20% ~ (p=0.315 n=8+10) name old speed new speed delta CodeEncoder 4.87GB/s ± 2% 4.74GB/s ± 2% -2.53% (p=0.000 n=9+9) CodeEncoderError 4.31GB/s ± 1% 4.30GB/s ± 2% ~ (p=0.684 n=10+10) CodeMarshal 3.51GB/s ± 2% 3.46GB/s ± 3% ~ (p=0.075 n=10+10) CodeMarshalError 2.65GB/s ± 3% 2.63GB/s ± 2% ~ (p=0.400 n=9+10) name old alloc/op new alloc/op delta CodeEncoder 327B ±347% 447B ±232% +36.93% (p=0.034 n=9+10) CodeEncoderError 142B ± 1% 143B ± 0% ~ (p=1.000 n=8+7) CodeMarshal 1.96MB ± 2% 1.96MB ± 2% ~ (p=0.468 n=10+10) CodeMarshalError 2.04MB ± 3% 2.03MB ± 1% ~ (p=0.971 n=10+10) EncodeMarshaler 4.00B ± 0% 4.00B ± 0% ~ (all equal) EncoderEncode 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta CodeEncoder 0.00 0.00 ~ (all equal) CodeEncoderError 4.00 ± 0% 4.00 ± 0% ~ (all equal) CodeMarshal 1.00 ± 0% 1.00 ± 0% ~ (all equal) CodeMarshalError 6.00 ± 0% 6.00 ± 0% ~ (all equal) EncodeMarshaler 1.00 ± 0% 1.00 ± 0% ~ (all equal) EncoderEncode 0.00 0.00 ~ (all equal) There is a very slight performance degradation for CodeEncoder due to an increase in allocation sizes. However, the number of allocations did not change. This is likely due to remote effects of the growth rate differences between bytes.Buffer and the builtin append function. We shouldn't overly rely on the growth rate of bytes.Buffer anyways since that is subject to possibly change in #51462. As the benchtime increases, the alloc/op goes down indicating that the amortized memory cost is fixed. Updates #27735 Change-Id: Ie35e480e292fe082d7986e0a4d81212c1d4202b3 Reviewed-on: https://go-review.googlesource.com/c/go/+/469556 Run-TryBot: Joseph Tsai <[email protected]> Reviewed-by: Bryan Mills <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Daniel Martí <[email protected]> Auto-Submit: Joseph Tsai <[email protected]>
1 parent 21ff670 commit e2f4134

File tree

2 files changed

+35
-160
lines changed

2 files changed

+35
-160
lines changed

src/encoding/json/encode.go

+35-111
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ type encodeState struct {
295295
ptrSeen map[any]struct{}
296296
}
297297

298+
func (e *encodeState) AvailableBuffer() []byte {
299+
return availableBuffer(&e.Buffer)
300+
}
301+
298302
const startDetectingCyclesAfter = 1000
299303

300304
var encodeStatePool sync.Pool
@@ -519,7 +523,7 @@ func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) {
519523
if err != nil {
520524
e.error(&MarshalerError{v.Type(), err, "MarshalText"})
521525
}
522-
e.stringBytes(b, opts.escapeHTML)
526+
e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML))
523527
}
524528

525529
func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) {
@@ -533,7 +537,7 @@ func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) {
533537
if err != nil {
534538
e.error(&MarshalerError{v.Type(), err, "MarshalText"})
535539
}
536-
e.stringBytes(b, opts.escapeHTML)
540+
e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML))
537541
}
538542

539543
func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) {
@@ -639,14 +643,10 @@ func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) {
639643
return
640644
}
641645
if opts.quoted {
642-
e2 := newEncodeState()
643-
// Since we encode the string twice, we only need to escape HTML
644-
// the first time.
645-
e2.string(v.String(), opts.escapeHTML)
646-
e.stringBytes(e2.Bytes(), false)
647-
encodeStatePool.Put(e2)
646+
b := appendString(nil, v.String(), opts.escapeHTML)
647+
e.Write(appendString(e.AvailableBuffer(), b, false)) // no need to escape again since it is already escaped
648648
} else {
649-
e.string(v.String(), opts.escapeHTML)
649+
e.Write(appendString(e.AvailableBuffer(), v.String(), opts.escapeHTML))
650650
}
651651
}
652652

@@ -811,7 +811,7 @@ func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
811811
if i > 0 {
812812
e.WriteByte(',')
813813
}
814-
e.string(kv.ks, opts.escapeHTML)
814+
e.Write(appendString(e.AvailableBuffer(), kv.ks, opts.escapeHTML))
815815
e.WriteByte(':')
816816
me.elemEnc(e, kv.v, opts)
817817
}
@@ -1029,121 +1029,49 @@ func (w *reflectWithString) resolve() error {
10291029
panic("unexpected map key type")
10301030
}
10311031

1032-
// NOTE: keep in sync with stringBytes below.
1033-
func (e *encodeState) string(s string, escapeHTML bool) {
1034-
e.WriteByte('"')
1032+
func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) []byte {
1033+
dst = append(dst, '"')
10351034
start := 0
1036-
for i := 0; i < len(s); {
1037-
if b := s[i]; b < utf8.RuneSelf {
1035+
for i := 0; i < len(src); {
1036+
if b := src[i]; b < utf8.RuneSelf {
10381037
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
10391038
i++
10401039
continue
10411040
}
1042-
if start < i {
1043-
e.WriteString(s[start:i])
1044-
}
1045-
e.WriteByte('\\')
1041+
dst = append(dst, src[start:i]...)
10461042
switch b {
10471043
case '\\', '"':
1048-
e.WriteByte(b)
1044+
dst = append(dst, '\\', b)
10491045
case '\n':
1050-
e.WriteByte('n')
1046+
dst = append(dst, '\\', 'n')
10511047
case '\r':
1052-
e.WriteByte('r')
1048+
dst = append(dst, '\\', 'r')
10531049
case '\t':
1054-
e.WriteByte('t')
1050+
dst = append(dst, '\\', 't')
10551051
default:
10561052
// This encodes bytes < 0x20 except for \t, \n and \r.
10571053
// If escapeHTML is set, it also escapes <, >, and &
10581054
// because they can lead to security holes when
10591055
// user-controlled strings are rendered into JSON
10601056
// and served to some browsers.
1061-
e.WriteString(`u00`)
1062-
e.WriteByte(hex[b>>4])
1063-
e.WriteByte(hex[b&0xF])
1057+
dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF])
10641058
}
10651059
i++
10661060
start = i
10671061
continue
10681062
}
1069-
c, size := utf8.DecodeRuneInString(s[i:])
1070-
if c == utf8.RuneError && size == 1 {
1071-
if start < i {
1072-
e.WriteString(s[start:i])
1073-
}
1074-
e.WriteString(`\ufffd`)
1075-
i += size
1076-
start = i
1077-
continue
1063+
// TODO(https://go.dev/issue/56948): Use generic utf8 functionality.
1064+
// For now, cast only a small portion of byte slices to a string
1065+
// so that it can be stack allocated. This slows down []byte slightly
1066+
// due to the extra copy, but keeps string performance roughly the same.
1067+
n := len(src) - i
1068+
if n > utf8.UTFMax {
1069+
n = utf8.UTFMax
10781070
}
1079-
// U+2028 is LINE SEPARATOR.
1080-
// U+2029 is PARAGRAPH SEPARATOR.
1081-
// They are both technically valid characters in JSON strings,
1082-
// but don't work in JSONP, which has to be evaluated as JavaScript,
1083-
// and can lead to security holes there. It is valid JSON to
1084-
// escape them, so we do so unconditionally.
1085-
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
1086-
if c == '\u2028' || c == '\u2029' {
1087-
if start < i {
1088-
e.WriteString(s[start:i])
1089-
}
1090-
e.WriteString(`\u202`)
1091-
e.WriteByte(hex[c&0xF])
1092-
i += size
1093-
start = i
1094-
continue
1095-
}
1096-
i += size
1097-
}
1098-
if start < len(s) {
1099-
e.WriteString(s[start:])
1100-
}
1101-
e.WriteByte('"')
1102-
}
1103-
1104-
// NOTE: keep in sync with string above.
1105-
func (e *encodeState) stringBytes(s []byte, escapeHTML bool) {
1106-
e.WriteByte('"')
1107-
start := 0
1108-
for i := 0; i < len(s); {
1109-
if b := s[i]; b < utf8.RuneSelf {
1110-
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
1111-
i++
1112-
continue
1113-
}
1114-
if start < i {
1115-
e.Write(s[start:i])
1116-
}
1117-
e.WriteByte('\\')
1118-
switch b {
1119-
case '\\', '"':
1120-
e.WriteByte(b)
1121-
case '\n':
1122-
e.WriteByte('n')
1123-
case '\r':
1124-
e.WriteByte('r')
1125-
case '\t':
1126-
e.WriteByte('t')
1127-
default:
1128-
// This encodes bytes < 0x20 except for \t, \n and \r.
1129-
// If escapeHTML is set, it also escapes <, >, and &
1130-
// because they can lead to security holes when
1131-
// user-controlled strings are rendered into JSON
1132-
// and served to some browsers.
1133-
e.WriteString(`u00`)
1134-
e.WriteByte(hex[b>>4])
1135-
e.WriteByte(hex[b&0xF])
1136-
}
1137-
i++
1138-
start = i
1139-
continue
1140-
}
1141-
c, size := utf8.DecodeRune(s[i:])
1071+
c, size := utf8.DecodeRuneInString(string(src[i : i+n]))
11421072
if c == utf8.RuneError && size == 1 {
1143-
if start < i {
1144-
e.Write(s[start:i])
1145-
}
1146-
e.WriteString(`\ufffd`)
1073+
dst = append(dst, src[start:i]...)
1074+
dst = append(dst, `\ufffd`...)
11471075
i += size
11481076
start = i
11491077
continue
@@ -1156,21 +1084,17 @@ func (e *encodeState) stringBytes(s []byte, escapeHTML bool) {
11561084
// escape them, so we do so unconditionally.
11571085
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
11581086
if c == '\u2028' || c == '\u2029' {
1159-
if start < i {
1160-
e.Write(s[start:i])
1161-
}
1162-
e.WriteString(`\u202`)
1163-
e.WriteByte(hex[c&0xF])
1087+
dst = append(dst, src[start:i]...)
1088+
dst = append(dst, '\\', 'u', '2', '0', '2', hex[c&0xF])
11641089
i += size
11651090
start = i
11661091
continue
11671092
}
11681093
i += size
11691094
}
1170-
if start < len(s) {
1171-
e.Write(s[start:])
1172-
}
1173-
e.WriteByte('"')
1095+
dst = append(dst, src[start:]...)
1096+
dst = append(dst, '"')
1097+
return dst
11741098
}
11751099

11761100
// A field represents a single field found in a struct.

src/encoding/json/encode_test.go

-49
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"runtime/debug"
1616
"strconv"
1717
"testing"
18-
"unicode"
1918
)
2019

2120
type Optionals struct {
@@ -701,54 +700,6 @@ func TestDuplicatedFieldDisappears(t *testing.T) {
701700
}
702701
}
703702

704-
func TestStringBytes(t *testing.T) {
705-
t.Parallel()
706-
// Test that encodeState.stringBytes and encodeState.string use the same encoding.
707-
var r []rune
708-
for i := '\u0000'; i <= unicode.MaxRune; i++ {
709-
if testing.Short() && i > 1000 {
710-
i = unicode.MaxRune
711-
}
712-
r = append(r, i)
713-
}
714-
s := string(r) + "\xff\xff\xffhello" // some invalid UTF-8 too
715-
716-
for _, escapeHTML := range []bool{true, false} {
717-
es := &encodeState{}
718-
es.string(s, escapeHTML)
719-
720-
esBytes := &encodeState{}
721-
esBytes.stringBytes([]byte(s), escapeHTML)
722-
723-
enc := es.Buffer.String()
724-
encBytes := esBytes.Buffer.String()
725-
if enc != encBytes {
726-
i := 0
727-
for i < len(enc) && i < len(encBytes) && enc[i] == encBytes[i] {
728-
i++
729-
}
730-
enc = enc[i:]
731-
encBytes = encBytes[i:]
732-
i = 0
733-
for i < len(enc) && i < len(encBytes) && enc[len(enc)-i-1] == encBytes[len(encBytes)-i-1] {
734-
i++
735-
}
736-
enc = enc[:len(enc)-i]
737-
encBytes = encBytes[:len(encBytes)-i]
738-
739-
if len(enc) > 20 {
740-
enc = enc[:20] + "..."
741-
}
742-
if len(encBytes) > 20 {
743-
encBytes = encBytes[:20] + "..."
744-
}
745-
746-
t.Errorf("with escapeHTML=%t, encodings differ at %#q vs %#q",
747-
escapeHTML, enc, encBytes)
748-
}
749-
}
750-
}
751-
752703
func TestIssue10281(t *testing.T) {
753704
type Foo struct {
754705
N Number

0 commit comments

Comments
 (0)