Skip to content

Commit 562881d

Browse files
committed
Improve memory usage during map build
Speed up build for large maps by allocating in chunks and then copying over to one slice at the end. This generates less garbage and involves less copying than the previous strategy of using append.
1 parent a4c6370 commit 562881d

File tree

1 file changed

+54
-14
lines changed

1 file changed

+54
-14
lines changed

uint32_store.go

+54-14
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,49 @@ type (
2929
// Get should return the value for the supplied key
3030
Get(string) uint32
3131
}
32+
33+
// uint32Builder is used only during construction
34+
uint32Builder struct {
35+
all [][]byteValue
36+
src Uint32Source
37+
len int
38+
}
3239
)
3340

34-
// NewUint32Store creates from the data supplied in srcMap
35-
func NewUint32Store(srcMap Uint32Source) Uint32Store {
36-
m := Uint32Store{store: make([]byteValue, 1)}
37-
if keys := srcMap.AppendKeys([]string(nil)); len(keys) > 0 {
41+
// NewUint32Store creates from the data supplied in src
42+
func NewUint32Store(src Uint32Source) Uint32Store {
43+
if keys := src.AppendKeys([]string(nil)); len(keys) > 0 {
3844
sort.Strings(keys)
39-
m.makeByteValue(&m.store[0], keys, 0, srcMap)
45+
return Uint32Store{store: uint32Build(keys, src)}
46+
}
47+
return Uint32Store{store: []byteValue{{}}}
48+
}
49+
50+
// uint32Build constructs the map by allocating memory in blocks
51+
// and then copying into the eventual slice at the end. This is
52+
// more efficient than continually using append.
53+
func uint32Build(keys []string, src Uint32Source) []byteValue {
54+
b := uint32Builder{
55+
all: [][]byteValue{make([]byteValue, 1, 256)},
56+
src: src,
57+
len: 1,
58+
}
59+
b.makeByteValue(&b.all[0][0], keys, 0)
60+
// copy all blocks to one slice
61+
s := make([]byteValue, 0, b.len)
62+
for _, a := range b.all {
63+
s = append(s, a...)
4064
}
41-
return m
65+
return s
4266
}
4367

4468
// makeByteValue will initialise the supplied byteValue for
4569
// the sorted strings in slice a considering bytes at byteIndex in the strings
46-
func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, srcMap Uint32Source) {
70+
func (b *uint32Builder) makeByteValue(bv *byteValue, a []string, byteIndex int) {
4771
// if there is a string with no more bytes then it is always first because they are sorted
4872
if len(a[0]) == byteIndex {
4973
bv.valid = true
50-
bv.value = srcMap.Get(a[0])
74+
bv.value = b.src.Get(a[0])
5175
a = a[1:]
5276
}
5377
if len(a) == 0 {
@@ -56,23 +80,39 @@ func (m *Uint32Store) makeByteValue(bv *byteValue, a []string, byteIndex int, sr
5680
bv.nextOffset = a[0][byteIndex] // lowest value for next byte
5781
bv.nextLen = a[len(a)-1][byteIndex] - // highest value for next byte
5882
bv.nextOffset + 1 // minus lowest value +1 = number of possible next bytes
59-
bv.nextLo = uint32(len(m.store)) // first byteValue struct to use
60-
61-
// allocate enough byteValue structs - they default to "not valid"
62-
m.store = append(m.store, make([]byteValue, bv.nextLen)...)
83+
bv.nextLo = uint32(b.len) // first byteValue struct in eventual built slice
84+
next := b.alloc(bv.nextLen) // new byteValues default to "not valid"
6385

6486
for i, n := 0, len(a); i < n; {
6587
// find range of strings starting with the same byte
6688
iSameByteHi := i + 1
6789
for iSameByteHi < n && a[iSameByteHi][byteIndex] == a[i][byteIndex] {
6890
iSameByteHi++
6991
}
70-
nextStoreIndex := bv.nextLo + uint32(a[i][byteIndex]-bv.nextOffset)
71-
m.makeByteValue(&m.store[nextStoreIndex], a[i:iSameByteHi], byteIndex+1, srcMap)
92+
b.makeByteValue(&next[(a[i][byteIndex]-bv.nextOffset)], a[i:iSameByteHi], byteIndex+1)
7293
i = iSameByteHi
7394
}
7495
}
7596

97+
// alloc will grab space in the current block if available or allocate a new one if not
98+
func (b *uint32Builder) alloc(nByteValues byte) []byteValue {
99+
n := int(nByteValues)
100+
b.len += n
101+
cur := &b.all[len(b.all)-1] // current
102+
curCap, curLen := cap(*cur), len(*cur)
103+
if curCap-curLen >= n { // enough space in current
104+
*cur = (*cur)[: curLen+n : curCap]
105+
return (*cur)[curLen:]
106+
}
107+
newCap := curCap
108+
if newCap < 1<<20 {
109+
newCap *= 2
110+
}
111+
a := make([]byteValue, n, newCap)
112+
b.all = append(b.all, a)
113+
return a
114+
}
115+
76116
// LookupString looks up the supplied string in the map
77117
func (m *Uint32Store) LookupString(s string) (uint32, bool) {
78118
bv := &m.store[0]

0 commit comments

Comments
 (0)