Skip to content

Commit ba98f67

Browse files
authored
Only create final MatrixStatsResults on final reduction (#38130)
MatrixStatsResults is the "final" result object, and runs an additional computation in it's ctor to calculate covariance, etc. This means it should only run on the final reduction instead of on every reduce.
1 parent bdd3a82 commit ba98f67

File tree

3 files changed

+73
-6
lines changed

3 files changed

+73
-6
lines changed

modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/matrix/stats/InternalMatrixStats.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,15 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
244244
}
245245

246246
RunningStats runningStats = new RunningStats();
247-
for (int i=0; i < aggs.size(); ++i) {
248-
runningStats.merge(((InternalMatrixStats) aggs.get(i)).stats);
247+
for (InternalAggregation agg : aggs) {
248+
runningStats.merge(((InternalMatrixStats) agg).stats);
249249
}
250-
MatrixStatsResults results = new MatrixStatsResults(runningStats);
251250

252-
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
251+
if (reduceContext.isFinalReduce()) {
252+
MatrixStatsResults results = new MatrixStatsResults(runningStats);
253+
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
254+
}
255+
return new InternalMatrixStats(name, runningStats.docCount, runningStats, null, pipelineAggregators(), getMetaData());
253256
}
254257

255258
@Override

modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MatrixStatsAggregatorTests.java

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ public void testNoData() throws Exception {
5858
}
5959
}
6060

61-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37587")
6261
public void testTwoFields() throws Exception {
6362
String fieldA = "a";
6463
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
@@ -89,8 +88,49 @@ public void testTwoFields() throws Exception {
8988
IndexSearcher searcher = new IndexSearcher(reader);
9089
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
9190
.fields(Arrays.asList(fieldA, fieldB));
92-
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
91+
InternalMatrixStats stats = search(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
92+
// Since `search` doesn't do any reduction, and the InternalMatrixStats object will have a null `MatrixStatsResults`
93+
// object. That is created during the final reduction, which also does a final round of computations
94+
// So we have to create a MatrixStatsResults object here manually so that the final `compute()` is called
9395
multiPassStats.assertNearlyEqual(new MatrixStatsResults(stats.getStats()));
96+
}
97+
}
98+
}
99+
100+
public void testTwoFieldsReduce() throws Exception {
101+
String fieldA = "a";
102+
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
103+
ftA.setName(fieldA);
104+
String fieldB = "b";
105+
MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
106+
ftB.setName(fieldB);
107+
108+
try (Directory directory = newDirectory();
109+
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
110+
111+
int numDocs = scaledRandomIntBetween(8192, 16384);
112+
Double[] fieldAValues = new Double[numDocs];
113+
Double[] fieldBValues = new Double[numDocs];
114+
for (int docId = 0; docId < numDocs; docId++) {
115+
Document document = new Document();
116+
fieldAValues[docId] = randomDouble();
117+
document.add(new SortedNumericDocValuesField(fieldA, NumericUtils.doubleToSortableLong(fieldAValues[docId])));
118+
119+
fieldBValues[docId] = randomDouble();
120+
document.add(new SortedNumericDocValuesField(fieldB, NumericUtils.doubleToSortableLong(fieldBValues[docId])));
121+
indexWriter.addDocument(document);
122+
}
123+
124+
MultiPassStats multiPassStats = new MultiPassStats(fieldA, fieldB);
125+
multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues));
126+
try (IndexReader reader = indexWriter.getReader()) {
127+
IndexSearcher searcher = new IndexSearcher(reader);
128+
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
129+
.fields(Arrays.asList(fieldA, fieldB));
130+
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
131+
// Unlike testTwoFields, `searchAndReduce` will execute reductions so the `MatrixStatsResults` object
132+
// will be populated and fully computed. We should use that value directly to test against
133+
multiPassStats.assertNearlyEqual(stats);
94134
assertTrue(MatrixAggregationInspectionHelper.hasValue(stats));
95135
}
96136
}

modules/aggs-matrix-stats/src/test/java/org/elasticsearch/search/aggregations/matrix/stats/MultiPassStats.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,30 @@ void assertNearlyEqual(MatrixStatsResults stats) {
136136
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
137137
}
138138

139+
void assertNearlyEqual(InternalMatrixStats stats) {
140+
assertEquals(count, stats.getDocCount());
141+
assertEquals(count, stats.getFieldCount(fieldAKey));
142+
assertEquals(count, stats.getFieldCount(fieldBKey));
143+
// means
144+
assertTrue(nearlyEqual(means.get(fieldAKey), stats.getMean(fieldAKey), 1e-7));
145+
assertTrue(nearlyEqual(means.get(fieldBKey), stats.getMean(fieldBKey), 1e-7));
146+
// variances
147+
assertTrue(nearlyEqual(variances.get(fieldAKey), stats.getVariance(fieldAKey), 1e-7));
148+
assertTrue(nearlyEqual(variances.get(fieldBKey), stats.getVariance(fieldBKey), 1e-7));
149+
// skewness (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
150+
assertTrue(nearlyEqual(skewness.get(fieldAKey), stats.getSkewness(fieldAKey), 1e-4));
151+
assertTrue(nearlyEqual(skewness.get(fieldBKey), stats.getSkewness(fieldBKey), 1e-4));
152+
// kurtosis (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
153+
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
154+
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
155+
// covariances
156+
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
157+
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
158+
// correlation
159+
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
160+
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
161+
}
162+
139163
private static boolean nearlyEqual(double a, double b, double epsilon) {
140164
final double absA = Math.abs(a);
141165
final double absB = Math.abs(b);

0 commit comments

Comments
 (0)