Skip to content

Commit 9ce721d

Browse files
committed
util: avoid allocations when escaping multibyte characters
EncodeEscapedChar (which is called in EncodeSQLStringWithFlags) is pretty optimized, but for escaping a multibyte character it was using fmt.FPrintf, which means every multibyte character ended up on the heap due to golang/go#8618. This had a noticeable impact in changefeed benchmarking. This commit just hand-compiles the two formatting strings that were being used into reasonably efficient go, eliminating the allocs. Benchmark encoding the first 10000 runes shows a 4x speedup: Before: BenchmarkEncodeNonASCIISQLString-16 944 1216130 ns/op After: BenchmarkEncodeNonASCIISQLString-16 3468 300777 ns/op Release note: None
1 parent ae38d90 commit 9ce721d

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

pkg/sql/lexbase/encode_test.go

+11
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,14 @@ func BenchmarkEncodeSQLString(b *testing.B) {
123123
lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
124124
}
125125
}
126+
127+
func BenchmarkEncodeNonASCIISQLString(b *testing.B) {
128+
builder := strings.Builder{}
129+
for r := rune(0); r < 10000; r++ {
130+
builder.WriteRune(r)
131+
}
132+
str := builder.String()
133+
for i := 0; i < b.N; i++ {
134+
lexbase.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lexbase.EncBareStrings)
135+
}
136+
}

pkg/util/stringencoding/string_encoding.go

+19-5
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ package stringencoding
2121

2222
import (
2323
"bytes"
24-
"fmt"
2524
"unicode/utf8"
2625
)
2726

@@ -109,14 +108,29 @@ func EncodeEscapedChar(
109108
// Escape non-printable characters.
110109
buf.Write(HexMap[currentByte])
111110
}
112-
} else if ln == 2 {
113-
// For multi-byte runes, print them based on their width.
114-
fmt.Fprintf(buf, `\u%04X`, currentRune)
115111
} else {
116-
fmt.Fprintf(buf, `\U%08X`, currentRune)
112+
writeMultibyteRuneAsHex(buf, currentRune, ln)
117113
}
118114
}
119115

116+
const uppercaseHex = `0123456789ABCDEF`
117+
118+
// writeMultibyteRuneAsHex is equivalent to either
119+
// fmt.FPrintf(`\u%04X`) or fmt.FPrintf(`\U%08X`).
120+
// We can't quite just use strconv since we need uppercase hex.
121+
func writeMultibyteRuneAsHex(buf *bytes.Buffer, r rune, ln int) {
122+
if ln == 2 {
123+
buf.WriteString(`\u0000`)
124+
} else {
125+
buf.WriteString(`\U00000000`)
126+
}
127+
for i := 1; r > 0; r >>= 4 {
128+
buf.Bytes()[buf.Len()-i] = uppercaseHex[r&0x0f]
129+
i++
130+
}
131+
132+
}
133+
120134
func writeHexDigit(buf *bytes.Buffer, v int) {
121135
if v < 10 {
122136
buf.WriteByte('0' + byte(v))

0 commit comments

Comments
 (0)