Skip to content

Commit 2b58861

Browse files
craig[bot]DrewKimballHonoreDB
committed
Merge #88425 #88671
88425: colexec: use tree.DNull when projection is called on null input r=DrewKimball a=DrewKimball Most projections skip rows for which one or more arguments are null, and just output a null for those rows. However, some projections can actually accept null arguments. Previously, we were using the values from the vec even when the `Nulls` bitmap was set for that row, which invalidates the data in the vec for that row. This could cause a non-null value to be unexpectedly concatenated to an array when an argument was null (nothing should be added to the array in this case). This commit modifies the projection operators that operate on datum-backed vectors to explicitly set the argument to `tree.DNull` in the case when the `Nulls` bitmap is set. This ensures that the projection is not performed with the invalid (and arbitrary) value in the datum vec at that index. Fixes #87919 Release note (bug fix): Fixed a bug in `Concat` projection operators for arrays that could cause non-null values to be added to the array when one of the arguments was null. 88671: util: avoid allocations when escaping multibyte characters r=[miretskiy,yuzefovich] a=HonoreDB EncodeEscapedChar (which is called in EncodeSQLStringWithFlags) is pretty optimized, but for escaping a multibyte character it was using fmt.FPrintf, which means every multibyte character ended up on the heap due to golang/go#8618. This had a noticeable impact in changefeed benchmarking. This commit just hand-compiles the two formatting strings that were being used into reasonably efficient go, eliminating the allocs. Benchmark encoding the first 10000 runes shows a 4x speedup: Before: BenchmarkEncodeNonASCIISQLString-16 944 1216130 ns/op After: BenchmarkEncodeNonASCIISQLString-16 3468 300777 ns/op Release note: None Co-authored-by: DrewKimball <[email protected]> Co-authored-by: Aaron Zinger <[email protected]>
3 parents 801bfc6 + 770898a + 9ce721d commit 2b58861

File tree

9 files changed

+3597
-5388
lines changed

9 files changed

+3597
-5388
lines changed

pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go

+1,407-2,092
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/sql/colexec/colexecproj/proj_non_const_ops_tmpl.go

+29-21
Original file line numberDiff line numberDiff line change
@@ -123,25 +123,7 @@ func (p _OP_NAME) Next() coldata.Batch {
123123
// of a projection is Null.
124124
// */}}
125125
_outNulls := projVec.Nulls()
126-
127-
// {{/*
128-
// If calledOnNullInput is true, the function’s definition can handle
129-
// null arguments. We would still want to perform the projection, and
130-
// there is no need to call projVec.SetNulls(). The behaviour will just
131-
// be the same as _HAS_NULLS is false. Since currently only
132-
// ConcatDatumDatum needs this calledOnNullInput == true behaviour,
133-
// logic for calledOnNullInput is only added to the if statement for
134-
// function with Datum, Datum. If we later introduce another projection
135-
// operation that has calledOnNullInput == true, we should update this
136-
// code accordingly.
137-
// */}}
138-
// {{if and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum")}}
139-
hasNullsAndNotCalledOnNullInput :=
140-
(vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls()) && !p.calledOnNullInput
141-
// {{else}}
142-
hasNullsAndNotCalledOnNullInput := (vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls())
143-
// {{end}}
144-
if hasNullsAndNotCalledOnNullInput {
126+
if vec1.Nulls().MaybeHasNulls() || vec2.Nulls().MaybeHasNulls() {
145127
_SET_PROJECTION(true)
146128
} else {
147129
_SET_PROJECTION(false)
@@ -158,6 +140,7 @@ func _SET_PROJECTION(_HAS_NULLS bool) {
158140
// {{define "setProjection" -}}
159141
// {{$hasNulls := $.HasNulls}}
160142
// {{with $.Overload}}
143+
// {{$isDatum := (and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum"))}}
161144
// {{if _HAS_NULLS}}
162145
col1Nulls := vec1.Nulls()
163146
col2Nulls := vec2.Nulls()
@@ -183,7 +166,13 @@ func _SET_PROJECTION(_HAS_NULLS bool) {
183166
// projVec.Nulls() so there is no need to call projVec.SetNulls().
184167
// */}}
185168
// {{if _HAS_NULLS}}
186-
projVec.SetNulls(_outNulls.Or(*col1Nulls).Or(*col2Nulls))
169+
// {{if $isDatum}}
170+
if !p.calledOnNullInput {
171+
// {{end}}
172+
projVec.SetNulls(_outNulls.Or(*col1Nulls).Or(*col2Nulls))
173+
// {{if $isDatum}}
174+
}
175+
// {{end}}
187176
// {{end}}
188177
// {{end}}
189178
// {{end}}
@@ -198,19 +187,38 @@ func _SET_SINGLE_TUPLE_PROJECTION(_HAS_NULLS bool, _HAS_SEL bool) { // */}}
198187
// {{$hasNulls := $.HasNulls}}
199188
// {{$hasSel := $.HasSel}}
200189
// {{with $.Overload}}
190+
// {{$isDatum := (and (eq .Left.VecMethod "Datum") (eq .Right.VecMethod "Datum"))}}
201191
// {{if _HAS_NULLS}}
202-
if !col1Nulls.NullAt(i) && !col2Nulls.NullAt(i) {
192+
if p.calledOnNullInput || (!col1Nulls.NullAt(i) && !col2Nulls.NullAt(i)) {
203193
// We only want to perform the projection operation if both values are not
204194
// null.
205195
// {{end}}
206196
// {{if and (.Left.Sliceable) (not _HAS_SEL)}}
207197
//gcassert:bce
208198
// {{end}}
209199
arg1 := col1.Get(i)
200+
// {{if (and _HAS_NULLS $isDatum)}}
201+
if col1Nulls.NullAt(i) {
202+
// {{/*
203+
// If we entered this branch for a null value, calledOnNullInput must be
204+
// true. This means the projection should be calculated on null arguments.
205+
// When a value is null, the underlying data in the slice is invalid and
206+
// can be anything, so we need to overwrite it here. calledOnNullInput is
207+
// currently only true for ConcatDatumDatum, so only the datum case needs
208+
// to be handled.
209+
// */}}
210+
arg1 = tree.DNull
211+
}
212+
// {{end}}
210213
// {{if and (.Right.Sliceable) (not _HAS_SEL)}}
211214
//gcassert:bce
212215
// {{end}}
213216
arg2 := col2.Get(i)
217+
// {{if (and _HAS_NULLS $isDatum)}}
218+
if col2Nulls.NullAt(i) {
219+
arg2 = tree.DNull
220+
}
221+
// {{end}}
214222
_ASSIGN(projCol[i], arg1, arg2, projCol, col1, col2)
215223
// {{if _HAS_NULLS}}
216224
}

0 commit comments

Comments
 (0)