Skip to content

Commit 5a27bab

Browse files
cdvr1993abhinav
andauthored
perf: Faster string encoding (#1350)
Recently we found an application were using zap.Reflect was faster than using zapcore.ObjectMarshaler. After profiling we found that string encoding is really expensive. After replicating what encoding/json does, we're able to get much better performance out of string encoding. The optimization is roughly this: instead of appending a rune at a time to the buffer, scan and append contiguous chunks of runes that don't need special handling (valid runes that don't need to be escaped). ### Benchmark results ``` goos: linux goarch: amd64 pkg: go.uber.org/zap/zapcore cpu: AMD EPYC 7B13 │ /tmp/old.txt │ /tmp/new.txt │ │ sec/op │ sec/op vs base │ ZapJSON-8 89.10µ ± 1% 33.38µ ± 3% -62.54% (p=0.000 n=10) StandardJSON-8 40.74µ ± 1% 42.46µ ± 1% +4.22% (p=0.000 n=10) geomean 60.25µ 37.65µ -37.52% ``` --------- Co-authored-by: Abhinav Gupta <[email protected]>
1 parent 82c728b commit 5a27bab

File tree

4 files changed

+190
-63
lines changed

4 files changed

+190
-63
lines changed

buffer/buffer.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ func (b *Buffer) AppendByte(v byte) {
4242
b.bs = append(b.bs, v)
4343
}
4444

45+
// AppendBytes writes a single byte to the Buffer.
46+
func (b *Buffer) AppendBytes(v []byte) {
47+
b.bs = append(b.bs, v...)
48+
}
49+
4550
// AppendString writes a string to the Buffer.
4651
func (b *Buffer) AppendString(s string) {
4752
b.bs = append(b.bs, s...)

zapcore/json_encoder.go

Lines changed: 84 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -486,73 +486,98 @@ func (enc *jsonEncoder) appendFloat(val float64, bitSize int) {
486486
// Unlike the standard library's encoder, it doesn't attempt to protect the
487487
// user from browser vulnerabilities or JSONP-related problems.
488488
func (enc *jsonEncoder) safeAddString(s string) {
489-
for i := 0; i < len(s); {
490-
if enc.tryAddRuneSelf(s[i]) {
491-
i++
492-
continue
493-
}
494-
r, size := utf8.DecodeRuneInString(s[i:])
495-
if enc.tryAddRuneError(r, size) {
496-
i++
497-
continue
498-
}
499-
enc.buf.AppendString(s[i : i+size])
500-
i += size
501-
}
489+
safeAppendStringLike(
490+
(*buffer.Buffer).AppendString,
491+
utf8.DecodeRuneInString,
492+
enc.buf,
493+
s,
494+
)
502495
}
503496

504497
// safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte.
505498
func (enc *jsonEncoder) safeAddByteString(s []byte) {
499+
safeAppendStringLike(
500+
(*buffer.Buffer).AppendBytes,
501+
utf8.DecodeRune,
502+
enc.buf,
503+
s,
504+
)
505+
}
506+
507+
// safeAppendStringLike is a generic implementation of safeAddString and safeAddByteString.
508+
// It appends a string or byte slice to the buffer, escaping all special characters.
509+
func safeAppendStringLike[S []byte | string](
510+
// appendTo appends this string-like object to the buffer.
511+
appendTo func(*buffer.Buffer, S),
512+
// decodeRune decodes the next rune from the string-like object
513+
// and returns its value and width in bytes.
514+
decodeRune func(S) (rune, int),
515+
buf *buffer.Buffer,
516+
s S,
517+
) {
518+
// The encoding logic below works by skipping over characters
519+
// that can be safely copied as-is,
520+
// until a character is found that needs special handling.
521+
// At that point, we copy everything we've seen so far,
522+
// and then handle that special character.
523+
//
524+
// last is the index of the last byte that was copied to the buffer.
525+
last := 0
506526
for i := 0; i < len(s); {
507-
if enc.tryAddRuneSelf(s[i]) {
527+
if s[i] >= utf8.RuneSelf {
528+
// Character >= RuneSelf may be part of a multi-byte rune.
529+
// They need to be decoded before we can decide how to handle them.
530+
r, size := decodeRune(s[i:])
531+
if r != utf8.RuneError || size != 1 {
532+
// No special handling required.
533+
// Skip over this rune and continue.
534+
i += size
535+
continue
536+
}
537+
538+
// Invalid UTF-8 sequence.
539+
// Replace it with the Unicode replacement character.
540+
appendTo(buf, s[last:i])
541+
buf.AppendString(`\ufffd`)
542+
508543
i++
509-
continue
510-
}
511-
r, size := utf8.DecodeRune(s[i:])
512-
if enc.tryAddRuneError(r, size) {
544+
last = i
545+
} else {
546+
// Character < RuneSelf is a single-byte UTF-8 rune.
547+
if s[i] >= 0x20 && s[i] != '\\' && s[i] != '"' {
548+
// No escaping necessary.
549+
// Skip over this character and continue.
550+
i++
551+
continue
552+
}
553+
554+
// This character needs to be escaped.
555+
appendTo(buf, s[last:i])
556+
switch s[i] {
557+
case '\\', '"':
558+
buf.AppendByte('\\')
559+
buf.AppendByte(s[i])
560+
case '\n':
561+
buf.AppendByte('\\')
562+
buf.AppendByte('n')
563+
case '\r':
564+
buf.AppendByte('\\')
565+
buf.AppendByte('r')
566+
case '\t':
567+
buf.AppendByte('\\')
568+
buf.AppendByte('t')
569+
default:
570+
// Encode bytes < 0x20, except for the escape sequences above.
571+
buf.AppendString(`\u00`)
572+
buf.AppendByte(_hex[s[i]>>4])
573+
buf.AppendByte(_hex[s[i]&0xF])
574+
}
575+
513576
i++
514-
continue
577+
last = i
515578
}
516-
enc.buf.Write(s[i : i+size])
517-
i += size
518579
}
519-
}
520580

521-
// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte.
522-
func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool {
523-
if b >= utf8.RuneSelf {
524-
return false
525-
}
526-
if b >= 0x20 && b != '\\' && b != '"' {
527-
enc.buf.AppendByte(b)
528-
return true
529-
}
530-
switch b {
531-
case '\\', '"':
532-
enc.buf.AppendByte('\\')
533-
enc.buf.AppendByte(b)
534-
case '\n':
535-
enc.buf.AppendByte('\\')
536-
enc.buf.AppendByte('n')
537-
case '\r':
538-
enc.buf.AppendByte('\\')
539-
enc.buf.AppendByte('r')
540-
case '\t':
541-
enc.buf.AppendByte('\\')
542-
enc.buf.AppendByte('t')
543-
default:
544-
// Encode bytes < 0x20, except for the escape sequences above.
545-
enc.buf.AppendString(`\u00`)
546-
enc.buf.AppendByte(_hex[b>>4])
547-
enc.buf.AppendByte(_hex[b&0xF])
548-
}
549-
return true
550-
}
551-
552-
func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool {
553-
if r == utf8.RuneError && size == 1 {
554-
enc.buf.AppendString(`\ufffd`)
555-
return true
556-
}
557-
return false
581+
// add remaining
582+
appendTo(buf, s[last:])
558583
}

zapcore/json_encoder_bench_test.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ package zapcore_test
2222

2323
import (
2424
"encoding/json"
25+
"fmt"
2526
"testing"
2627
"time"
2728

@@ -51,7 +52,28 @@ func BenchmarkZapJSONFloat32AndComplex64(b *testing.B) {
5152
})
5253
}
5354

55+
const _sliceSize = 5000
56+
57+
type StringSlice []string
58+
59+
func (s StringSlice) MarshalLogArray(encoder ArrayEncoder) error {
60+
for _, str := range s {
61+
encoder.AppendString(str)
62+
}
63+
return nil
64+
}
65+
66+
func generateStringSlice(n int) StringSlice {
67+
output := make(StringSlice, 0, n)
68+
for i := 0; i < n; i++ {
69+
output = append(output, fmt.Sprint("00000000-0000-0000-0000-0000000000", i))
70+
}
71+
return output
72+
}
73+
5474
func BenchmarkZapJSON(b *testing.B) {
75+
additional := generateStringSlice(_sliceSize)
76+
b.ResetTimer()
5577
b.RunParallel(func(pb *testing.PB) {
5678
for pb.Next() {
5779
enc := NewJSONEncoder(testEncoderConfig())
@@ -64,6 +86,7 @@ func BenchmarkZapJSON(b *testing.B) {
6486
enc.AddString("string3", "🤔")
6587
enc.AddString("string4", "🙊")
6688
enc.AddBool("bool", true)
89+
_ = enc.AddArray("test", additional)
6790
buf, _ := enc.EncodeEntry(Entry{
6891
Message: "fake",
6992
Level: DebugLevel,
@@ -75,10 +98,11 @@ func BenchmarkZapJSON(b *testing.B) {
7598

7699
func BenchmarkStandardJSON(b *testing.B) {
77100
record := struct {
78-
Level string `json:"level"`
79-
Message string `json:"msg"`
80-
Time time.Time `json:"ts"`
81-
Fields map[string]interface{} `json:"fields"`
101+
Level string `json:"level"`
102+
Message string `json:"msg"`
103+
Time time.Time `json:"ts"`
104+
Fields map[string]interface{} `json:"fields"`
105+
Additional StringSlice
82106
}{
83107
Level: "debug",
84108
Message: "fake",
@@ -94,6 +118,7 @@ func BenchmarkStandardJSON(b *testing.B) {
94118
"string4": "🙊",
95119
"bool": true,
96120
},
121+
Additional: generateStringSlice(_sliceSize),
97122
}
98123
b.ResetTimer()
99124
b.RunParallel(func(pb *testing.PB) {

zapcore/json_encoder_impl_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,13 @@ import (
2929
"testing"
3030
"testing/quick"
3131
"time"
32+
"unicode/utf8"
3233

34+
"go.uber.org/zap/buffer"
3335
"go.uber.org/zap/internal/bufferpool"
3436

3537
"github.com/stretchr/testify/assert"
38+
"github.com/stretchr/testify/require"
3639
"go.uber.org/multierr"
3740
)
3841

@@ -662,3 +665,72 @@ func TestJSONQuick(t *testing.T) {
662665
check(asciiRoundTripsCorrectlyString)
663666
check(asciiRoundTripsCorrectlyByteString)
664667
}
668+
669+
var _stringLikeCorpus = []string{
670+
"",
671+
"foo",
672+
"bar",
673+
"a\nb",
674+
"a\tb",
675+
"a\\b",
676+
`a"b`,
677+
}
678+
679+
func FuzzSafeAppendStringLike_bytes(f *testing.F) {
680+
for _, s := range _stringLikeCorpus {
681+
f.Add([]byte(s))
682+
}
683+
f.Fuzz(func(t *testing.T, b []byte) {
684+
if !utf8.Valid(b) {
685+
t.Skip()
686+
}
687+
688+
fuzzSafeAppendStringLike(t, string(b), func(buf *buffer.Buffer) {
689+
safeAppendStringLike(
690+
(*buffer.Buffer).AppendBytes,
691+
utf8.DecodeRune,
692+
buf,
693+
b,
694+
)
695+
})
696+
})
697+
}
698+
699+
func FuzzSafeAppendStringLike_string(f *testing.F) {
700+
for _, s := range _stringLikeCorpus {
701+
f.Add(s)
702+
}
703+
f.Fuzz(func(t *testing.T, s string) {
704+
if !utf8.ValidString(s) {
705+
t.Skip()
706+
}
707+
708+
fuzzSafeAppendStringLike(t, s, func(buf *buffer.Buffer) {
709+
safeAppendStringLike(
710+
(*buffer.Buffer).AppendString,
711+
utf8.DecodeRuneInString,
712+
buf,
713+
s,
714+
)
715+
})
716+
})
717+
}
718+
719+
func fuzzSafeAppendStringLike(
720+
t *testing.T,
721+
want string,
722+
writeString func(*buffer.Buffer),
723+
) {
724+
t.Helper()
725+
726+
buf := bufferpool.Get()
727+
defer buf.Free()
728+
729+
buf.AppendByte('"')
730+
writeString(buf)
731+
buf.AppendByte('"')
732+
733+
var got string
734+
require.NoError(t, json.Unmarshal(buf.Bytes(), &got))
735+
assert.Equal(t, want, got)
736+
}

0 commit comments

Comments
 (0)