Skip to content

Commit 045f1ed

Browse files
authored
MB-54131: Geoshape query decode optimization (#14)
* MB-54131: Geoshape query decode optimization - Added buffer pool pointer to polygon - Added parent polygon pointer to loop - Added decoder function to select, populate and return a pointer to the buffer from a pool of buffers - Updated loop decode function to handle chunks of data being read - Updated polygon decode function to pass on the buffer pool to the loops - Updated extractShapesFromBytes and FilterGeoShapesOnRelation to include the buffer pool in its parameters * MB-54131: Geoshape query decode optimization - Changed buffer pool from pointer to slice - Removed randomly formatted comments * MB-54131: Geoshape query decode + add .gitignore * MB-54131: Geoshape query decode optimization - Added a buffer pool struct - Increased size of the maximum buffer to 24KB - Added better comments * MB-54131: Geoshape query decode optimization - Minor formatting fixes - Reusing buffers instead of creating everytime * MB-54131: Geoshape query decode optimization - Better comments - Removed some magic numbers * MB-54131: Geoshape query decode optimization - Changed buffer pool implementation to give the smallest buffer that fits the data completely - Changed decoder implementation to allow buffer sizes bigger than number of bytes needed * MB-54131: Geoshape query decode optimization - Renamed some constants * MB-54131: Geoshape query decode optimization - Added benchmark for loop decode
1 parent 7135870 commit 045f1ed

File tree

7 files changed

+182
-10
lines changed

7 files changed

+182
-10
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.DS_Store

geojson/geojson_shapes_util.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ var jsoniter = jsoniterator.ConfigCompatibleWithStandardLibrary
3131
// the `relation` filter and confirms whether the shape in the document
3232
// satisfies the given relation.
3333
func FilterGeoShapesOnRelation(shape index.GeoJSON, targetShapeBytes []byte,
34-
relation string, reader **bytes.Reader) (bool, error) {
34+
relation string, reader **bytes.Reader, bufPool *s2.GeoBufferPool) (bool, error) {
3535

36-
shapeInDoc, err := extractShapesFromBytes(targetShapeBytes, reader)
36+
shapeInDoc, err := extractShapesFromBytes(targetShapeBytes, reader, bufPool)
3737
if err != nil {
3838
return false, err
3939
}
@@ -43,7 +43,7 @@ func FilterGeoShapesOnRelation(shape index.GeoJSON, targetShapeBytes []byte,
4343

4444
// extractShapesFromBytes unmarshal the bytes to retrieve the
4545
// embedded geojson shape.
46-
func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
46+
func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader, bufPool *s2.GeoBufferPool) (
4747
index.GeoJSON, error) {
4848
if (*r) == nil {
4949
*r = bytes.NewReader(targetShapeBytes[1:])
@@ -109,7 +109,7 @@ func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
109109
return mls, nil
110110

111111
case PolygonTypePrefix:
112-
pgn := &Polygon{s2pgn: &s2.Polygon{}}
112+
pgn := &Polygon{s2pgn: &s2.Polygon{BufPool: bufPool}}
113113
err := pgn.s2pgn.Decode(*r)
114114
if err != nil {
115115
return nil, err
@@ -156,7 +156,7 @@ func extractShapesFromBytes(targetShapeBytes []byte, r **bytes.Reader) (
156156
gc := &GeometryCollection{Shapes: make([]index.GeoJSON, numShapes)}
157157

158158
for i := int32(0); i < numShapes; i++ {
159-
shape, err := extractShapesFromBytes(inputBytes[:lengths[i]], r)
159+
shape, err := extractShapesFromBytes(inputBytes[:lengths[i]], r, nil)
160160
if err != nil {
161161
return nil, err
162162
}

s2/buffer_pool.go

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright (c) 2023 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package s2
16+
17+
// GeoBufferPool represents a pool of buffers ranging from a given
18+
// max size to a min size in steps of 2. It uses a lazy approach only allocating
19+
// the buffers when it is needed.
20+
21+
type GeoBufferPool struct {
22+
buffers [][]byte
23+
maxSize int
24+
minSize int
25+
}
26+
27+
func NewGeoBufferPool(maxSize int, minSize int) *GeoBufferPool {
28+
// Calculating the number of buffers required. Assuming that
29+
// the value of minSize is correct, the buffers will be of size
30+
// minSize, 2 * minSize, 4 * minSize and so on till it is less
31+
// than or equal to the maxSize. If it is not equal to maxSize,
32+
// then a suitable value less than maxSize will be set as maxSize
33+
length := 0
34+
temp := minSize
35+
for temp <= maxSize {
36+
length = length + 1
37+
temp = temp * 2
38+
}
39+
maxSize = temp / 2
40+
41+
return &GeoBufferPool{
42+
buffers: make([][]byte, length),
43+
maxSize: maxSize,
44+
minSize: minSize,
45+
}
46+
}
47+
48+
func (b *GeoBufferPool) Get(size int) ([]byte) {
49+
bufSize := b.minSize
50+
51+
for i := range b.buffers {
52+
if size <= bufSize || i == len(b.buffers) - 1{
53+
if b.buffers[i] == nil {
54+
b.buffers[i] = make([]byte, bufSize)
55+
}
56+
57+
return b.buffers[i]
58+
} else {
59+
bufSize = bufSize * 2
60+
}
61+
}
62+
63+
return nil
64+
}

s2/encode.go

+14
Original file line numberDiff line numberDiff line change
@@ -222,3 +222,17 @@ func (d *decoder) readUvarint() (x uint64) {
222222
x, d.err = binary.ReadUvarint(d.r)
223223
return
224224
}
225+
226+
func (d *decoder) readFloat64Array(size int, buf []byte) int {
227+
if d.err != nil || buf == nil {
228+
return 0
229+
}
230+
231+
if size >= len(buf) {
232+
_, d.err = io.ReadFull(d.r, buf)
233+
return len(buf)
234+
} else {
235+
_, d.err = io.ReadFull(d.r, buf[0:size])
236+
return size
237+
}
238+
}

s2/loop.go

+34-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package s2
1616

1717
import (
18+
"encoding/binary"
1819
"fmt"
1920
"io"
2021
"math"
@@ -24,6 +25,9 @@ import (
2425
"github.com/golang/geo/s1"
2526
)
2627

28+
const SizeOfFloat = 8
29+
const SizeOfVertex = 3 * SizeOfFloat
30+
2731
// Loop represents a simple spherical polygon. It consists of a sequence
2832
// of vertices where the first vertex is implicitly connected to the
2933
// last. All loops are defined to have a CCW orientation, i.e. the interior of
@@ -66,6 +70,9 @@ type Loop struct {
6670

6771
// index is the spatial index for this Loop.
6872
index *ShapeIndex
73+
74+
// A buffer pool to be used while decoding the polygon
75+
BufPool *GeoBufferPool
6976
}
7077

7178
// LoopFromPoints constructs a loop from the given points.
@@ -1287,11 +1294,34 @@ func (l *Loop) decode(d *decoder) {
12871294
return
12881295
}
12891296
l.vertices = make([]Point, nvertices)
1290-
for i := range l.vertices {
1291-
l.vertices[i].X = d.readFloat64()
1292-
l.vertices[i].Y = d.readFloat64()
1293-
l.vertices[i].Z = d.readFloat64()
1297+
1298+
// Each vertex requires 24 bytes of storage
1299+
numBytesNeeded := int(nvertices) * SizeOfVertex
1300+
1301+
i := 0
1302+
1303+
for numBytesNeeded > 0 {
1304+
arr := l.BufPool.Get(numBytesNeeded)
1305+
numBytesRead := d.readFloat64Array(numBytesNeeded, arr)
1306+
1307+
if numBytesRead == 0 {
1308+
break
1309+
}
1310+
1311+
numBytesNeeded = numBytesNeeded - numBytesRead
1312+
1313+
// Parsing one vertex at a time into the vertex array of the loop
1314+
// by going through the buffer in steps of SizeOfVertex and converting
1315+
// floatSize worth of bytes into the float values
1316+
for j := 0; j < int(numBytesRead/SizeOfVertex); j++ {
1317+
l.vertices[i+j].X = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3) : SizeOfFloat*(j*3+1)]))
1318+
l.vertices[i+j].Y = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3+1) : SizeOfFloat*(j*3+2)]))
1319+
l.vertices[i+j].Z = math.Float64frombits(binary.LittleEndian.Uint64(arr[SizeOfFloat*(j*3+2) : SizeOfFloat*(j*3+3)]))
1320+
}
1321+
1322+
i = i + int(numBytesRead/SizeOfVertex)
12941323
}
1324+
12951325
l.index = NewShapeIndex()
12961326
l.originInside = d.readBool()
12971327
l.depth = int(d.readUint32())

s2/loop_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package s2
1717
import (
1818
"fmt"
1919
"math"
20+
"os"
2021
"testing"
2122

2223
"github.com/golang/geo/r1"
@@ -1817,3 +1818,61 @@ func BenchmarkLoopContainsPoint(b *testing.B) {
18171818
vertices *= 2
18181819
}
18191820
}
1821+
1822+
func BenchmarkLoopDecode(b *testing.B) {
1823+
1824+
points := make([][]float64, 0)
1825+
1826+
points = append(points, []float64{10, 10})
1827+
for i := 1; i < 2000; i++ {
1828+
points = append(points, []float64{10 - 0.01*float64(i), 10})
1829+
}
1830+
points = append(points, []float64{-10, 10})
1831+
for i := 1; i < 2000; i++ {
1832+
points = append(points, []float64{-10, 10 - 0.01*float64(i)})
1833+
}
1834+
points = append(points, []float64{-10, -10})
1835+
for i := 1; i < 2000; i++ {
1836+
points = append(points, []float64{-10 + 0.01*float64(i), -10})
1837+
}
1838+
points = append(points, []float64{10, -10})
1839+
for i := 1; i < 2000; i++ {
1840+
points = append(points, []float64{10, -10 + 0.01*float64(i)})
1841+
}
1842+
points = append(points, []float64{10, 10})
1843+
1844+
pointString := ""
1845+
1846+
for i := 0; i < len(points); i++ {
1847+
1848+
if i == 0 {
1849+
pointString = pointString + fmt.Sprintf("%f:%f", points[i][0], points[i][1])
1850+
} else {
1851+
pointString = pointString + fmt.Sprintf(", %f:%f", points[i][0], points[i][1])
1852+
}
1853+
}
1854+
1855+
loop := LoopFromPoints(parsePoints(pointString))
1856+
1857+
f, err := os.OpenFile("testLoop.txt", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600)
1858+
if err != nil {
1859+
b.Fatalf("%v", err)
1860+
}
1861+
loop.Encode(f)
1862+
f.Close()
1863+
1864+
bufPool := NewGeoBufferPool(24 * 1024, 24)
1865+
b.ResetTimer()
1866+
for i := 0; i < b.N; i++ {
1867+
f, err := os.Open("testLoop.txt")
1868+
if err != nil {
1869+
b.Fatalf("%v", err)
1870+
}
1871+
l := &Loop{
1872+
BufPool: bufPool,
1873+
}
1874+
l.decode(&decoder{r: asByteReader(f)})
1875+
f.Close()
1876+
}
1877+
os.Remove("testLoop.txt")
1878+
}

s2/polygon.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type Polygon struct {
7777
// preceding loops in the polygon. This field is used for polygons that
7878
// have a large number of loops, and may be empty for polygons with few loops.
7979
cumulativeEdges []int
80+
81+
// A buffer pool to be used while decoding the polygon
82+
BufPool *GeoBufferPool
8083
}
8184

8285
// PolygonFromLoops constructs a polygon from the given set of loops. The polygon
@@ -1133,7 +1136,7 @@ func (p *Polygon) Decode(r io.Reader) error {
11331136
const maxEncodedLoops = 10000000
11341137

11351138
func (p *Polygon) decode(d *decoder) {
1136-
*p = Polygon{}
1139+
*p = Polygon{BufPool: p.BufPool}
11371140
d.readUint8() // Ignore irrelevant serialized owns_loops_ value.
11381141

11391142
p.hasHoles = d.readBool()
@@ -1151,6 +1154,7 @@ func (p *Polygon) decode(d *decoder) {
11511154
p.loops = make([]*Loop, nloops)
11521155
for i := range p.loops {
11531156
p.loops[i] = new(Loop)
1157+
p.loops[i].BufPool = p.BufPool
11541158
p.loops[i].decode(d)
11551159
p.numVertices += len(p.loops[i].vertices)
11561160
}

0 commit comments

Comments
 (0)