Skip to content

Commit 2e15ea1

Browse files
authored
Implement weighted geo_shape centroid support (#50297)
This PR implements proper centroid calculations of geometries according to the definition defined in #49887. To compute things correctly, an additional variable encoded long representing the total weight for the centroid of the geometry in a tree. This weight is always positive. Some tests are fixed, as they did not have valid geometries. closes #49887.
1 parent f686a0b commit 2e15ea1

File tree

13 files changed

+322
-98
lines changed

13 files changed

+322
-98
lines changed

server/src/main/java/org/elasticsearch/common/geo/CentroidCalculator.java

+79-28
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,19 @@
3838
* as the centroid of a shape.
3939
*/
4040
public class CentroidCalculator {
41-
4241
private double compX;
4342
private double compY;
4443
private double sumX;
4544
private double sumY;
46-
private int count;
45+
private double sumWeight;
4746
private DimensionalShapeType dimensionalShapeType;
4847

4948
public CentroidCalculator(Geometry geometry) {
5049
this.sumX = 0.0;
5150
this.compX = 0.0;
5251
this.sumY = 0.0;
5352
this.compY = 0.0;
54-
this.count = 0;
53+
this.sumWeight = 0.0;
5554
CentroidCalculatorVisitor visitor = new CentroidCalculatorVisitor(this);
5655
geometry.visit(visitor);
5756
this.dimensionalShapeType = DimensionalShapeType.forGeometry(geometry);
@@ -60,22 +59,22 @@ public CentroidCalculator(Geometry geometry) {
6059
/**
6160
* adds a single coordinate to the running sum and count of coordinates
6261
* for centroid calculation
63-
*
64-
* @param x the x-coordinate of the point
62+
* @param x the x-coordinate of the point
6563
* @param y the y-coordinate of the point
64+
* @param weight the associated weight of the coordinate
6665
*/
67-
private void addCoordinate(double x, double y) {
68-
double correctedX = x - compX;
66+
private void addCoordinate(double x, double y, double weight) {
67+
double correctedX = weight * x - compX;
6968
double newSumX = sumX + correctedX;
7069
compX = (newSumX - sumX) - correctedX;
7170
sumX = newSumX;
7271

73-
double correctedY = y - compY;
72+
double correctedY = weight * y - compY;
7473
double newSumY = sumY + correctedY;
7574
compY = (newSumY - sumY) - correctedY;
7675
sumY = newSumY;
7776

78-
count += 1;
77+
sumWeight += weight;
7978
}
8079

8180
/**
@@ -87,26 +86,45 @@ private void addCoordinate(double x, double y) {
8786
* @param otherCalculator the other centroid calculator to add from
8887
*/
8988
public void addFrom(CentroidCalculator otherCalculator) {
90-
addCoordinate(otherCalculator.sumX, otherCalculator.sumY);
91-
// adjust count
92-
count += otherCalculator.count - 1;
93-
dimensionalShapeType = DimensionalShapeType.max(dimensionalShapeType, otherCalculator.dimensionalShapeType);
89+
int compared = DimensionalShapeType.COMPARATOR.compare(dimensionalShapeType, otherCalculator.dimensionalShapeType);
90+
if (compared < 0) {
91+
sumWeight = otherCalculator.sumWeight;
92+
dimensionalShapeType = otherCalculator.dimensionalShapeType;
93+
sumX = otherCalculator.sumX;
94+
sumY = otherCalculator.sumY;
95+
compX = otherCalculator.compX;
96+
compY = otherCalculator.compY;
97+
} else if (compared == 0) {
98+
addCoordinate(otherCalculator.sumX, otherCalculator.sumY, otherCalculator.sumWeight);
99+
} // else (compared > 0) do not modify centroid calculation since otherCalculator is of lower dimension than this calculator
94100
}
95101

96102
/**
97103
* @return the x-coordinate centroid
98104
*/
99105
public double getX() {
100-
return sumX / count;
106+
// normalization required due to floating point precision errors
107+
return GeoUtils.normalizeLon(sumX / sumWeight);
101108
}
102109

103110
/**
104111
* @return the y-coordinate centroid
105112
*/
106113
public double getY() {
107-
return sumY / count;
114+
// normalization required due to floating point precision errors
115+
return GeoUtils.normalizeLat(sumY / sumWeight);
116+
}
117+
118+
/**
119+
* @return the sum of all the weighted coordinates summed in the calculator
120+
*/
121+
public double sumWeight() {
122+
return sumWeight;
108123
}
109124

125+
/**
126+
* @return the highest dimensional shape type summed in the calculator
127+
*/
110128
public DimensionalShapeType getDimensionalShapeType() {
111129
return dimensionalShapeType;
112130
}
@@ -121,8 +139,7 @@ private CentroidCalculatorVisitor(CentroidCalculator calculator) {
121139

122140
@Override
123141
public Void visit(Circle circle) {
124-
calculator.addCoordinate(circle.getX(), circle.getY());
125-
return null;
142+
throw new IllegalArgumentException("invalid shape type found [Circle] while calculating centroid");
126143
}
127144

128145
@Override
@@ -135,17 +152,47 @@ public Void visit(GeometryCollection<?> collection) {
135152

136153
@Override
137154
public Void visit(Line line) {
138-
for (int i = 0; i < line.length(); i++) {
139-
calculator.addCoordinate(line.getX(i), line.getY(i));
155+
// a line's centroid is calculated by summing the center of each
156+
// line segment weighted by the line segment's length in degrees
157+
for (int i = 0; i < line.length() - 1; i++) {
158+
double diffX = line.getX(i) - line.getX(i + 1);
159+
double diffY = line.getY(i) - line.getY(i + 1);
160+
double x = (line.getX(i) + line.getX(i + 1)) / 2;
161+
double y = (line.getY(i) + line.getY(i + 1)) / 2;
162+
calculator.addCoordinate(x, y, Math.sqrt(diffX * diffX + diffY * diffY));
140163
}
141164
return null;
142165
}
143-
144166
@Override
145167
public Void visit(LinearRing ring) {
168+
throw new IllegalArgumentException("invalid shape type found [LinearRing] while calculating centroid");
169+
}
170+
171+
private Void visit(LinearRing ring, boolean isHole) {
172+
// implementation of calculation defined in
173+
// https://www.seas.upenn.edu/~sys502/extra_materials/Polygon%20Area%20and%20Centroid.pdf
174+
//
175+
// centroid of a ring is a weighted coordinate based on the ring's area.
176+
// the sign of the area is positive for the outer-shell of a polygon and negative for the holes
177+
178+
int sign = isHole ? -1 : 1;
179+
double totalRingArea = 0.0;
146180
for (int i = 0; i < ring.length() - 1; i++) {
147-
calculator.addCoordinate(ring.getX(i), ring.getY(i));
181+
totalRingArea += (ring.getX(i) * ring.getY(i + 1)) - (ring.getX(i + 1) * ring.getY(i));
148182
}
183+
totalRingArea = totalRingArea / 2;
184+
185+
double sumX = 0.0;
186+
double sumY = 0.0;
187+
for (int i = 0; i < ring.length() - 1; i++) {
188+
double twiceArea = (ring.getX(i) * ring.getY(i + 1)) - (ring.getX(i + 1) * ring.getY(i));
189+
sumX += twiceArea * (ring.getX(i) + ring.getX(i + 1));
190+
sumY += twiceArea * (ring.getY(i) + ring.getY(i + 1));
191+
}
192+
double cX = sumX / (6 * totalRingArea);
193+
double cY = sumY / (6 * totalRingArea);
194+
calculator.addCoordinate(cX, cY, sign * Math.abs(totalRingArea));
195+
149196
return null;
150197
}
151198

@@ -175,22 +222,26 @@ public Void visit(MultiPolygon multiPolygon) {
175222

176223
@Override
177224
public Void visit(Point point) {
178-
calculator.addCoordinate(point.getX(), point.getY());
225+
calculator.addCoordinate(point.getX(), point.getY(), 1.0);
179226
return null;
180227
}
181228

182229
@Override
183230
public Void visit(Polygon polygon) {
184-
// TODO: incorporate holes into centroid calculation
185-
return visit(polygon.getPolygon());
231+
visit(polygon.getPolygon(), false);
232+
for (int i = 0; i < polygon.getNumberOfHoles(); i++) {
233+
visit(polygon.getHole(i), true);
234+
}
235+
return null;
186236
}
187237

188238
@Override
189239
public Void visit(Rectangle rectangle) {
190-
calculator.addCoordinate(rectangle.getMinX(), rectangle.getMinY());
191-
calculator.addCoordinate(rectangle.getMinX(), rectangle.getMaxY());
192-
calculator.addCoordinate(rectangle.getMaxX(), rectangle.getMinY());
193-
calculator.addCoordinate(rectangle.getMaxX(), rectangle.getMaxY());
240+
double sumX = rectangle.getMaxX() + rectangle.getMinX();
241+
double sumY = rectangle.getMaxY() + rectangle.getMinY();
242+
double diffX = rectangle.getMaxX() - rectangle.getMinX();
243+
double diffY = rectangle.getMaxY() - rectangle.getMinY();
244+
calculator.addCoordinate(sumX / 2, sumY / 2, Math.abs(diffX * diffY));
194245
return null;
195246
}
196247
}

server/src/main/java/org/elasticsearch/common/geo/DimensionalShapeType.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ public enum DimensionalShapeType {
5353
GEOMETRYCOLLECTION_LINES, // highest-dimensional shapes are Lines
5454
GEOMETRYCOLLECTION_POLYGONS; // highest-dimensional shapes are Polygons
5555

56-
private static DimensionalShapeType[] values = values();
56+
public static Comparator<DimensionalShapeType> COMPARATOR = Comparator.comparingInt(DimensionalShapeType::centroidDimension);
5757

58-
private static Comparator<DimensionalShapeType> COMPARATOR = Comparator.comparingInt(DimensionalShapeType::centroidDimension);
58+
private static DimensionalShapeType[] values = values();
5959

6060
public static DimensionalShapeType max(DimensionalShapeType s1, DimensionalShapeType s2) {
6161
if (s1 == null) {
@@ -66,12 +66,16 @@ public static DimensionalShapeType max(DimensionalShapeType s1, DimensionalShape
6666
return COMPARATOR.compare(s1, s2) >= 0 ? s1 : s2;
6767
}
6868

69+
public static DimensionalShapeType fromOrdinalByte(byte ordinal) {
70+
return values[Byte.toUnsignedInt(ordinal)];
71+
}
72+
6973
public void writeTo(ByteBuffersDataOutput out) {
7074
out.writeByte((byte) ordinal());
7175
}
7276

7377
public static DimensionalShapeType readFrom(ByteArrayDataInput in) {
74-
return values[Byte.toUnsignedInt(in.readByte())];
78+
return fromOrdinalByte(in.readByte());
7579
}
7680

7781
public static DimensionalShapeType forGeometry(Geometry geometry) {
@@ -80,8 +84,7 @@ public static DimensionalShapeType forGeometry(Geometry geometry) {
8084

8185
@Override
8286
public DimensionalShapeType visit(Circle circle) {
83-
st = DimensionalShapeType.max(st, DimensionalShapeType.POLYGON);
84-
return st;
87+
throw new IllegalArgumentException("invalid shape type found [Circle] while computing dimensional shape type");
8588
}
8689

8790
@Override

server/src/main/java/org/elasticsearch/common/geo/TriangleTreeReader.java

+24-4
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,26 @@
3131
*
3232
* This class supports checking bounding box
3333
* relations against the serialized triangle tree.
34+
*
35+
* -----------------------------------------
36+
* | The binary format of the tree |
37+
* -----------------------------------------
38+
* ----------------------------------------- --
39+
* | centroid-x-coord (4 bytes) | |
40+
* ----------------------------------------- |
41+
* | centroid-y-coord (4 bytes) | |
42+
* ----------------------------------------- |
43+
* | DimensionalShapeType (1 byte) | | Centroid-related header
44+
* ----------------------------------------- |
45+
* | Sum of weights (VLong 1-8 bytes) | |
46+
* ----------------------------------------- --
47+
* | Extent (var-encoding) |
48+
* -----------------------------------------
49+
* | Triangle Tree |
50+
* -----------------------------------------
51+
* -----------------------------------------
3452
*/
3553
public class TriangleTreeReader {
36-
private static final int CENTROID_HEADER_SIZE_IN_BYTES = 9;
37-
3854
private final ByteArrayDataInput input;
3955
private final CoordinateEncoder coordinateEncoder;
4056
private final Rectangle2D rectangle2D;
@@ -58,8 +74,7 @@ public void reset(BytesRef bytesRef) throws IOException {
5874
*/
5975
public Extent getExtent() {
6076
if (treeOffset == 0) {
61-
// TODO: Compress serialization of extent
62-
input.setPosition(CENTROID_HEADER_SIZE_IN_BYTES);
77+
getSumCentroidWeight(); // skip CENTROID_HEADER + var-long sum-weight
6378
Extent.readFromCompressed(input, extent);
6479
treeOffset = input.getPosition();
6580
} else {
@@ -89,6 +104,11 @@ public DimensionalShapeType getDimensionalShapeType() {
89104
return DimensionalShapeType.readFrom(input);
90105
}
91106

107+
public double getSumCentroidWeight() {
108+
input.setPosition(9);
109+
return Double.longBitsToDouble(input.readVLong());
110+
}
111+
92112
/**
93113
* Compute the relation with the provided bounding box. If the result is CELL_INSIDE_QUERY
94114
* then the bounding box is within the shape.

server/src/main/java/org/elasticsearch/common/geo/TriangleTreeWriter.java

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public void writeTo(ByteBuffersDataOutput out) throws IOException {
5050
out.writeInt(coordinateEncoder.encodeX(centroidCalculator.getX()));
5151
out.writeInt(coordinateEncoder.encodeY(centroidCalculator.getY()));
5252
centroidCalculator.getDimensionalShapeType().writeTo(out);
53+
out.writeVLong(Double.doubleToLongBits(centroidCalculator.sumWeight()));
5354
extent.writeCompressed(out);
5455
node.writeTo(out);
5556
}

server/src/main/java/org/elasticsearch/index/fielddata/MultiGeoValues.java

+11
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ public DimensionalShapeType dimensionalShapeType() {
123123
return DimensionalShapeType.POINT;
124124
}
125125

126+
@Override
127+
public double weight() {
128+
return 1.0;
129+
}
130+
126131
@Override
127132
public double lat() {
128133
return geoPoint.lat();
@@ -173,6 +178,11 @@ public DimensionalShapeType dimensionalShapeType() {
173178
return reader.getDimensionalShapeType();
174179
}
175180

181+
@Override
182+
public double weight() {
183+
return reader.getSumCentroidWeight();
184+
}
185+
176186
@Override
177187
public double lat() {
178188
return reader.getCentroidY();
@@ -229,6 +239,7 @@ public interface GeoValue {
229239
BoundingBox boundingBox();
230240
GeoRelation relate(Rectangle rectangle);
231241
DimensionalShapeType dimensionalShapeType();
242+
double weight();
232243
}
233244

234245
public static class BoundingBox {

0 commit comments

Comments
 (0)