@@ -81,7 +81,7 @@ func main() {
81
81
o .maxSkip = 100
82
82
o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm" , 17 , 14 , 7 , 7 , limit14B )
83
83
o .maxSkip = 0
84
- o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm64K" , 16 , 14 , 7 , 7 , 64 << 10 - 1 )
84
+ o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm64K" , 16 , 13 , 7 , 7 , 64 << 10 - 1 )
85
85
o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm12B" , 14 , 12 , 6 , 6 , limit12B )
86
86
o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm10B" , 12 , 10 , 5 , 6 , limit10B )
87
87
o .genEncodeBetterBlockAsm ("encodeSnappyBetterBlockAsm8B" , 10 , 8 , 4 , 6 , limit8B )
@@ -146,6 +146,15 @@ func assert(fn func(ok LabelRef)) {
146
146
}
147
147
}
148
148
149
+ type regTable struct {
150
+ r reg.Register
151
+ disp int
152
+ }
153
+
154
+ func (r regTable ) Idx (idx reg.GPVirtual , scale uint8 ) Mem {
155
+ return Mem {Base : r .r , Index : idx , Scale : scale , Disp : r .disp }
156
+ }
157
+
149
158
type options struct {
150
159
snappy bool
151
160
bmi1 bool
@@ -163,7 +172,15 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
163
172
if o .skipOutput {
164
173
dstTxt = ""
165
174
}
166
- TEXT (name , 0 , "func(" + dstTxt + "src []byte) int" )
175
+
176
+ var tableSize = 4 * (1 << tableBits )
177
+ // Memzero needs at least 128 bytes.
178
+ if tableSize < 128 {
179
+ panic ("tableSize must be at least 128 bytes" )
180
+ }
181
+
182
+ arrPtr := fmt .Sprintf (",tmp *[%d]byte" , tableSize )
183
+ TEXT (name , 0 , "func(" + dstTxt + "src []byte" + arrPtr + ") int" )
167
184
Doc (name + " encodes a non-empty src to a guaranteed-large-enough dst." ,
168
185
fmt .Sprintf ("Maximum input %d bytes." , maxLen ),
169
186
"It assumes that the varint-encoded length of the decompressed bytes has already been written." , "" )
@@ -173,7 +190,6 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
173
190
o .maxOffset = maxLen - 1
174
191
var literalMaxOverhead = maxLitOverheadFor (maxLen )
175
192
176
- var tableSize = 4 * (1 << tableBits )
177
193
// Memzero needs at least 128 bytes.
178
194
if tableSize < 128 {
179
195
panic ("tableSize must be at least 128 bytes" )
@@ -209,8 +225,8 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
209
225
// nextSTempL keeps nextS while other functions are being called.
210
226
nextSTempL := AllocLocal (4 )
211
227
212
- // Alloc table last
213
- table := AllocLocal ( tableSize )
228
+ // Load pointer to temp table
229
+ table := regTable { r : Load ( Param ( "tmp" ), GP64 ())}
214
230
215
231
dst := GP64 ()
216
232
if ! o .skipOutput {
@@ -236,7 +252,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
236
252
iReg := GP64 ()
237
253
MOVQ (U32 (tableSize / 8 / 16 ), iReg )
238
254
tablePtr := GP64 ()
239
- LEAQ (table , tablePtr )
255
+ MOVQ (table . r , tablePtr )
240
256
zeroXmm := XMM ()
241
257
PXOR (zeroXmm , zeroXmm )
242
258
@@ -855,7 +871,17 @@ func maxLitOverheadFor(n int) int {
855
871
}
856
872
857
873
func (o options ) genEncodeBetterBlockAsm (name string , lTableBits , sTableBits , skipLog , lHashBytes , maxLen int ) {
858
- TEXT (name , 0 , "func(dst, src []byte) int" )
874
+ var lTableSize = 4 * (1 << lTableBits )
875
+ var sTableSize = 4 * (1 << sTableBits )
876
+ tableSize := lTableSize + sTableSize
877
+
878
+ // Memzero needs at least 128 bytes.
879
+ if tableSize < 128 {
880
+ panic ("tableSize must be at least 128 bytes" )
881
+ }
882
+ arrPtr := fmt .Sprintf (", tmp *[%d]byte" , tableSize )
883
+
884
+ TEXT (name , 0 , "func(dst, src []byte" + arrPtr + ") int" )
859
885
Doc (name + " encodes a non-empty src to a guaranteed-large-enough dst." ,
860
886
fmt .Sprintf ("Maximum input %d bytes." , maxLen ),
861
887
"It assumes that the varint-encoded length of the decompressed bytes has already been written." , "" )
@@ -870,9 +896,6 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
870
896
o .maxLen = maxLen
871
897
o .maxOffset = maxLen - 1
872
898
873
- var lTableSize = 4 * (1 << lTableBits )
874
- var sTableSize = 4 * (1 << sTableBits )
875
-
876
899
// Memzero needs at least 128 bytes.
877
900
if (lTableSize + sTableSize ) < 128 {
878
901
panic ("tableSize must be at least 128 bytes" )
@@ -905,9 +928,9 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
905
928
// nextSTempL keeps nextS while other functions are being called.
906
929
nextSTempL := AllocLocal (4 )
907
930
908
- // Alloc table last, lTab must be before sTab.
909
- lTab := AllocLocal ( lTableSize )
910
- sTab := AllocLocal ( sTableSize )
931
+ table := Load ( Param ( "tmp" ), GP64 ())
932
+ lTab := regTable { r : table }
933
+ sTab := regTable { r : table , disp : lTableSize }
911
934
912
935
dst := GP64 ()
913
936
{
@@ -930,7 +953,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
930
953
iReg := GP64 ()
931
954
MOVQ (U32 ((sTableSize + lTableSize )/ 8 / 16 ), iReg )
932
955
tablePtr := GP64 ()
933
- LEAQ ( lTab , tablePtr )
956
+ MOVQ ( table , tablePtr )
934
957
zeroXmm := XMM ()
935
958
PXOR (zeroXmm , zeroXmm )
936
959
@@ -2916,7 +2939,7 @@ func (o options) cvtLZ4BlockAsm(lz4s bool) {
2916
2939
TEXT ("cvt" + srcAlgo + "Block" + snap , NOSPLIT , "func(dst, src []byte) (uncompressed int, dstUsed int)" )
2917
2940
Doc ("cvt" + srcAlgo + "Block converts an " + srcAlgo + " block to " + dstAlgo , "" )
2918
2941
Pragma ("noescape" )
2919
- o .outputMargin = 10
2942
+ o .outputMargin = 8
2920
2943
o .maxOffset = math .MaxUint16
2921
2944
2922
2945
const (
0 commit comments