Skip to content

Commit fd22542

Browse files
TocarIPbradfitz
authored andcommitted
strings: speed-up replace for byteStringReplacer case
Use Count instead of loop to determine a number of replacements. Also increment index instead of advancing slices, to avoid some extra stores. Shows very significant speed-up on html benchmarks: Escape-6 34.2µs ± 2% 20.8µs ± 2% -39.06% (p=0.000 n=10+10) EscapeNone-6 7.04µs ± 1% 1.05µs ± 0% -85.03% (p=0.000 n=10+10) On benchmarks in package strings results are still significant: ByteStringMatch-6 1.59µs ± 2% 1.17µs ± 2% -26.35% (p=0.000 n=10+10) HTMLEscapeNew-6 390ns ± 2% 337ns ± 2% -13.62% (p=0.000 n=10+10) HTMLEscapeOld-6 621ns ± 2% 603ns ± 2% -2.95% (p=0.000 n=10+9) Change-Id: Ibea3235b6e451ba72cd5db57716d17b917e72944 Reviewed-on: https://go-review.googlesource.com/97255 Run-TryBot: Ilya Tocar <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent c0841ec commit fd22542

File tree

1 file changed

+53
-19
lines changed

1 file changed

+53
-19
lines changed

src/strings/replace.go

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,21 @@ func NewReplacer(oldnew ...string) *Replacer {
5454
return &Replacer{r: &r}
5555
}
5656

57-
r := byteStringReplacer{}
57+
r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
5858
// The first occurrence of old->new map takes precedence
5959
// over the others with the same old string.
6060
for i := len(oldnew) - 2; i >= 0; i -= 2 {
6161
o := oldnew[i][0]
6262
n := oldnew[i+1]
63-
r[o] = []byte(n)
63+
// To avoid counting repetitions multiple times.
64+
if r.replacements[o] == nil {
65+
// We need to use string([]byte{o}) instead of string(o),
66+
// to avoid utf8 encoding of o.
67+
// E. g. byte(150) produces string of length 2.
68+
r.toReplace = append(r.toReplace, string([]byte{o}))
69+
}
70+
r.replacements[o] = []byte(n)
71+
6472
}
6573
return &Replacer{r: &r}
6674
}
@@ -454,34 +462,60 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
454462

455463
// byteStringReplacer is the implementation that's used when all the
456464
// "old" values are single ASCII bytes but the "new" values vary in size.
457-
// The array contains replacement byte slices indexed by old byte.
458-
// A nil []byte means that the old byte should not be replaced.
459-
type byteStringReplacer [256][]byte
465+
type byteStringReplacer struct {
466+
// replacements contains replacement byte slices indexed by old byte.
467+
// A nil []byte means that the old byte should not be replaced.
468+
replacements [256][]byte
469+
// toReplace keeps a list of bytes to replace. Depending on length of toReplace
470+
// and length of target string it may be faster to use Count, or a plain loop.
471+
// We store single byte as a string, because Count takes a string.
472+
toReplace []string
473+
}
474+
475+
// countCutOff controls the ratio of a string length to a number of replacements
476+
// at which (*byteStringReplacer).Replace switches algorithms.
477+
// For strings with higher ration of length to replacements than that value,
478+
// we call Count, for each replacement from toReplace.
479+
// For strings, with a lower ratio we use simple loop, because of Count overhead.
480+
// countCutOff is an empirically determined overhead multiplier.
481+
// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
482+
const countCutOff = 8
460483

461484
func (r *byteStringReplacer) Replace(s string) string {
462485
newSize := len(s)
463486
anyChanges := false
464-
for i := 0; i < len(s); i++ {
465-
b := s[i]
466-
if r[b] != nil {
467-
anyChanges = true
468-
// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
469-
newSize += len(r[b]) - 1
487+
// Is it faster to use Count?
488+
if len(r.toReplace)*countCutOff <= len(s) {
489+
for _, x := range r.toReplace {
490+
if c := Count(s, x); c != 0 {
491+
// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
492+
newSize += c * (len(r.replacements[x[0]]) - 1)
493+
anyChanges = true
494+
}
495+
496+
}
497+
} else {
498+
for i := 0; i < len(s); i++ {
499+
b := s[i]
500+
if r.replacements[b] != nil {
501+
// See above for explanation of -1
502+
newSize += len(r.replacements[b]) - 1
503+
anyChanges = true
504+
}
470505
}
471506
}
472507
if !anyChanges {
473508
return s
474509
}
475510
buf := make([]byte, newSize)
476-
bi := buf
511+
j := 0
477512
for i := 0; i < len(s); i++ {
478513
b := s[i]
479-
if r[b] != nil {
480-
n := copy(bi, r[b])
481-
bi = bi[n:]
514+
if r.replacements[b] != nil {
515+
j += copy(buf[j:], r.replacements[b])
482516
} else {
483-
bi[0] = b
484-
bi = bi[1:]
517+
buf[j] = b
518+
j++
485519
}
486520
}
487521
return string(buf)
@@ -492,7 +526,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
492526
last := 0
493527
for i := 0; i < len(s); i++ {
494528
b := s[i]
495-
if r[b] == nil {
529+
if r.replacements[b] == nil {
496530
continue
497531
}
498532
if last != i {
@@ -503,7 +537,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
503537
}
504538
}
505539
last = i + 1
506-
nw, err := w.Write(r[b])
540+
nw, err := w.Write(r.replacements[b])
507541
n += nw
508542
if err != nil {
509543
return n, err

0 commit comments

Comments
 (0)