Skip to content

Commit 99e4718

Browse files
Merge pull request #2 from sensiblecodeio/duncan/improve-build-memory-usage
Improve memory usage during map build
2 parents a4c6370 + a9b6916 commit 99e4718

File tree

1 file changed

+64
-14
lines changed

1 file changed

+64
-14
lines changed

uint32_store.go

+64-14
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,49 @@ type (
2929
// Get should return the value for the supplied key
3030
Get(string) uint32
3131
}
32+
33+
// uint32Builder is used only during construction
34+
uint32Builder struct {
35+
all [][]byteValue
36+
src Uint32Source
37+
len int
38+
}
3239
)
3340

34-
// NewUint32Store creates from the data supplied in srcMap
35-
func NewUint32Store(srcMap Uint32Source) Uint32Store {
36-
m := Uint32Store{store: make([]byteValue, 1)}
37-
if keys := srcMap.AppendKeys([]string(nil)); len(keys) > 0 {
41+
// NewUint32Store creates from the data supplied in src
42+
func NewUint32Store(src Uint32Source) Uint32Store {
43+
if keys := src.AppendKeys([]string(nil)); len(keys) > 0 {
3844
sort.Strings(keys)
39-
m.makeByteValue(&m.store[0], keys, 0, srcMap)
45+
return Uint32Store{store: uint32Build(keys, src)}
46+
}
47+
return Uint32Store{store: []byteValue{{}}}
48+
}
49+
50+
// uint32Build constructs the map by allocating memory in blocks
51+
// and then copying into the eventual slice at the end. This is
52+
// more efficient than continually using append.
53+
func uint32Build(keys []string, src Uint32Source) []byteValue {
54+
b := uint32Builder{
55+
all: [][]byteValue{make([]byteValue, 1, firstBufSize(len(keys)))},
56+
src: src,
57+
len: 1,
58+
}
59+
b.makeByteValue(&b.all[0][0], keys, 0)
60+
// copy all blocks to one slice
61+
s := make([]byteValue, 0, b.len)
62+
for _, a := range b.all {
63+
s = append(s, a...)
4064
}
41-
return m
65+
return s
4266
}
4367

4468
// makeByteValue will initialise the supplied byteValue for
4569
// the sorted strings in slice a considering bytes at byteIndex in the strings
46-
func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, srcMap Uint32Source) {
70+
func (b *uint32Builder) makeByteValue(bv *byteValue, a []string, byteIndex int) {
4771
// if there is a string with no more bytes then it is always first because they are sorted
4872
if len(a[0]) == byteIndex {
4973
bv.valid = true
50-
bv.value = srcMap.Get(a[0])
74+
bv.value = b.src.Get(a[0])
5175
a = a[1:]
5276
}
5377
if len(a) == 0 {
@@ -56,23 +80,49 @@ func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, sr
5680
bv.nextOffset = a[0][byteIndex] // lowest value for next byte
5781
bv.nextLen = a[len(a)-1][byteIndex] - // highest value for next byte
5882
bv.nextOffset + 1 // minus lowest value +1 = number of possible next bytes
59-
bv.nextLo = uint32(len(m.store)) // first byteValue struct to use
60-
61-
// allocate enough byteValue structs - they default to "not valid"
62-
m.store = append(m.store, make([]byteValue, bv.nextLen)...)
83+
bv.nextLo = uint32(b.len) // first byteValue struct in eventual built slice
84+
next := b.alloc(bv.nextLen) // new byteValues default to "not valid"
6385

6486
for i, n := 0, len(a); i < n; {
6587
// find range of strings starting with the same byte
6688
iSameByteHi := i + 1
6789
for iSameByteHi < n && a[iSameByteHi][byteIndex] == a[i][byteIndex] {
6890
iSameByteHi++
6991
}
70-
nextStoreIndex := bv.nextLo + uint32(a[i][byteIndex]-bv.nextOffset)
71-
m.makeByteValue(&m.store[nextStoreIndex], a[i:iSameByteHi], byteIndex+1, srcMap)
92+
b.makeByteValue(&next[(a[i][byteIndex]-bv.nextOffset)], a[i:iSameByteHi], byteIndex+1)
7293
i = iSameByteHi
7394
}
7495
}
7596

97+
const maxBuildBufSize = 1 << 20
98+
99+
func firstBufSize(mapSize int) int {
100+
size := 1 << 4
101+
for size < mapSize && size < maxBuildBufSize {
102+
size <<= 1
103+
}
104+
return size
105+
}
106+
107+
// alloc will grab space in the current block if available or allocate a new one if not
108+
func (b *uint32Builder) alloc(nByteValues byte) []byteValue {
109+
n := int(nByteValues)
110+
b.len += n
111+
cur := &b.all[len(b.all)-1] // current
112+
curCap, curLen := cap(*cur), len(*cur)
113+
if curCap-curLen >= n { // enough space in current
114+
*cur = (*cur)[: curLen+n : curCap]
115+
return (*cur)[curLen:]
116+
}
117+
newCap := curCap
118+
if newCap < maxBuildBufSize {
119+
newCap *= 2
120+
}
121+
a := make([]byteValue, n, newCap)
122+
b.all = append(b.all, a)
123+
return a
124+
}
125+
76126
// LookupString looks up the supplied string in the map
77127
func (m *Uint32Store) LookupString(s string) (uint32, bool) {
78128
bv := &m.store[0]

0 commit comments

Comments
 (0)