Skip to content

Commit 81cba79

Browse files
authored
Add microbenchmark for LongKeyedBucketOrds (#58608) (#59459)
I've always been confused by the strange behavior that I saw when working on #57304. Specifically, I saw switching from a bimorphic invocation to a monomorphic invocation to give us a 7%-15% performance bump. This felt *bonkers* to me. And, it also made me wonder whether it'd be worth looking into doing it everywhere. It turns out that, no, it isn't needed everywhere. This benchmark shows that a bimorphic invocation like: ``` LongKeyedBucketOrds ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` is 19% slower than a monomorphic invocation like: ``` LongKeyedBucketOrds.ForSingle ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` But *only* when the reference is mutable. In the example above, if `ords` is never changed then both perform the same. But if the `ords` reference is assigned twice then we start to see the difference: ``` immutable bimorphic avgt 10 6.468 ± 0.045 ns/op immutable monomorphic avgt 10 6.756 ± 0.026 ns/op mutable bimorphic avgt 10 9.741 ± 0.073 ns/op mutable monomorphic avgt 10 8.190 ± 0.016 ns/op ``` So the conclusion from all this is that we've done the right thing: `auto_date_histogram` is the only aggregation in which `ords` isn't final and it is the only aggregation that forces monomorphic invocations. All other aggregations use an immutable bimorphic invocation. Which is fine. Relates to #56487
1 parent db89764 commit 81cba79

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.benchmark.search.aggregations.bucket.terms;
21+
22+
import org.elasticsearch.common.settings.Settings;
23+
import org.elasticsearch.common.util.BigArrays;
24+
import org.elasticsearch.common.util.PageCacheRecycler;
25+
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
26+
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
27+
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
28+
import org.openjdk.jmh.annotations.Benchmark;
29+
import org.openjdk.jmh.annotations.BenchmarkMode;
30+
import org.openjdk.jmh.annotations.Fork;
31+
import org.openjdk.jmh.annotations.Measurement;
32+
import org.openjdk.jmh.annotations.Mode;
33+
import org.openjdk.jmh.annotations.OperationsPerInvocation;
34+
import org.openjdk.jmh.annotations.OutputTimeUnit;
35+
import org.openjdk.jmh.annotations.Scope;
36+
import org.openjdk.jmh.annotations.Setup;
37+
import org.openjdk.jmh.annotations.State;
38+
import org.openjdk.jmh.annotations.Warmup;
39+
import org.openjdk.jmh.infra.Blackhole;
40+
41+
import java.util.concurrent.TimeUnit;
42+
43+
@Fork(2)
44+
@Warmup(iterations = 10)
45+
@Measurement(iterations = 5)
46+
@BenchmarkMode(Mode.AverageTime)
47+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
48+
@OperationsPerInvocation(1_000_000)
49+
@State(Scope.Benchmark)
50+
public class LongKeyedBucketOrdsBenchmark {
51+
private static final long LIMIT = 1_000_000;
52+
/**
53+
* The number of distinct values to add to the buckets.
54+
*/
55+
private static final long DISTINCT_VALUES = 10;
56+
/**
57+
* The number of buckets to create in the {@link #multiBucket} case.
58+
* <p>
59+
* If this is not relatively prime to {@link #DISTINCT_VALUES} then the
60+
* values won't be scattered evenly across the buckets.
61+
*/
62+
private static final long DISTINCT_BUCKETS = 21;
63+
64+
private final PageCacheRecycler recycler = new PageCacheRecycler(Settings.EMPTY);
65+
private final BigArrays bigArrays = new BigArrays(recycler, null, "REQUEST");
66+
67+
/**
68+
* Force loading all of the implementations just for extra paranoia's sake.
69+
* We really don't want the JVM to be able to eliminate one of them just
70+
* because we don't use it in the particular benchmark. That is totally a
71+
* thing it'd do. It is sneaky.
72+
*/
73+
@Setup
74+
public void forceLoadClasses(Blackhole bh) {
75+
bh.consume(LongKeyedBucketOrds.FromSingle.class);
76+
bh.consume(LongKeyedBucketOrds.FromMany.class);
77+
}
78+
79+
/**
80+
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds}
81+
* because it is not needed.
82+
*/
83+
@Benchmark
84+
public void singleBucketIntoSingleImmutableMonmorphicInvocation(Blackhole bh) {
85+
try (LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays)) {
86+
for (long i = 0; i < LIMIT; i++) {
87+
ords.add(0, i % DISTINCT_VALUES);
88+
}
89+
bh.consume(ords);
90+
}
91+
}
92+
93+
/**
94+
* Emulates the way that most aggregations use {@link LongKeyedBucketOrds}.
95+
*/
96+
@Benchmark
97+
public void singleBucketIntoSingleImmutableBimorphicInvocation(Blackhole bh) {
98+
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE)) {
99+
for (long i = 0; i < LIMIT; i++) {
100+
ords.add(0, i % DISTINCT_VALUES);
101+
}
102+
bh.consume(ords);
103+
}
104+
}
105+
106+
/**
107+
* Emulates the way that {@link AutoDateHistogramAggregationBuilder} uses {@link LongKeyedBucketOrds}.
108+
*/
109+
@Benchmark
110+
public void singleBucketIntoSingleMutableMonmorphicInvocation(Blackhole bh) {
111+
LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays);
112+
for (long i = 0; i < LIMIT; i++) {
113+
if (i % 100_000 == 0) {
114+
ords.close();
115+
bh.consume(ords);
116+
ords = new LongKeyedBucketOrds.FromSingle(bigArrays);
117+
}
118+
ords.add(0, i % DISTINCT_VALUES);
119+
}
120+
bh.consume(ords);
121+
ords.close();
122+
}
123+
124+
/**
125+
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds}
126+
* because it is significantly slower than the
127+
* {@link #singleBucketIntoSingleMutableMonmorphicInvocation monomorphic invocation}.
128+
*/
129+
@Benchmark
130+
public void singleBucketIntoSingleMutableBimorphicInvocation(Blackhole bh) {
131+
LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
132+
for (long i = 0; i < LIMIT; i++) {
133+
if (i % 100_000 == 0) {
134+
ords.close();
135+
bh.consume(ords);
136+
ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
137+
}
138+
ords.add(0, i % DISTINCT_VALUES);
139+
140+
}
141+
bh.consume(ords);
142+
ords.close();
143+
}
144+
145+
/**
146+
* Emulates an aggregation that collects from a single bucket "by accident".
147+
* This can happen if an aggregation is under, say, a {@code terms}
148+
* aggregation and there is only a single value for that term in the index.
149+
*/
150+
@Benchmark
151+
public void singleBucketIntoMulti(Blackhole bh) {
152+
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
153+
for (long i = 0; i < LIMIT; i++) {
154+
ords.add(0, i % DISTINCT_VALUES);
155+
}
156+
bh.consume(ords);
157+
}
158+
}
159+
160+
/**
161+
* Emulates an aggregation that collects from many buckets.
162+
*/
163+
@Benchmark
164+
public void multiBucket(Blackhole bh) {
165+
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
166+
for (long i = 0; i < LIMIT; i++) {
167+
ords.add(i % DISTINCT_BUCKETS, i % DISTINCT_VALUES);
168+
}
169+
bh.consume(ords);
170+
}
171+
}
172+
}

0 commit comments

Comments
 (0)