@@ -54,13 +54,21 @@ func NewReplacer(oldnew ...string) *Replacer {
54
54
return & Replacer {r : & r }
55
55
}
56
56
57
- r := byteStringReplacer {}
57
+ r := byteStringReplacer {toReplace : make ([] string , 0 , len ( oldnew ) / 2 ) }
58
58
// The first occurrence of old->new map takes precedence
59
59
// over the others with the same old string.
60
60
for i := len (oldnew ) - 2 ; i >= 0 ; i -= 2 {
61
61
o := oldnew [i ][0 ]
62
62
n := oldnew [i + 1 ]
63
- r [o ] = []byte (n )
63
+ // To avoid counting repetitions multiple times.
64
+ if r .replacements [o ] == nil {
65
+ // We need to use string([]byte{o}) instead of string(o),
66
+ // to avoid utf8 encoding of o.
67
+ // E. g. byte(150) produces string of length 2.
68
+ r .toReplace = append (r .toReplace , string ([]byte {o }))
69
+ }
70
+ r .replacements [o ] = []byte (n )
71
+
64
72
}
65
73
return & Replacer {r : & r }
66
74
}
@@ -454,34 +462,60 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
454
462
455
463
// byteStringReplacer is the implementation that's used when all the
456
464
// "old" values are single ASCII bytes but the "new" values vary in size.
457
- // The array contains replacement byte slices indexed by old byte.
458
- // A nil []byte means that the old byte should not be replaced.
459
- type byteStringReplacer [256 ][]byte
465
+ type byteStringReplacer struct {
466
+ // replacements contains replacement byte slices indexed by old byte.
467
+ // A nil []byte means that the old byte should not be replaced.
468
+ replacements [256 ][]byte
469
+ // toReplace keeps a list of bytes to replace. Depending on length of toReplace
470
+ // and length of target string it may be faster to use Count, or a plain loop.
471
+ // We store single byte as a string, because Count takes a string.
472
+ toReplace []string
473
+ }
474
+
475
+ // countCutOff controls the ratio of a string length to a number of replacements
476
+ // at which (*byteStringReplacer).Replace switches algorithms.
477
+ // For strings with higher ration of length to replacements than that value,
478
+ // we call Count, for each replacement from toReplace.
479
+ // For strings, with a lower ratio we use simple loop, because of Count overhead.
480
+ // countCutOff is an empirically determined overhead multiplier.
481
+ // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
482
+ const countCutOff = 8
460
483
461
484
func (r * byteStringReplacer ) Replace (s string ) string {
462
485
newSize := len (s )
463
486
anyChanges := false
464
- for i := 0 ; i < len (s ); i ++ {
465
- b := s [i ]
466
- if r [b ] != nil {
467
- anyChanges = true
468
- // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
469
- newSize += len (r [b ]) - 1
487
+ // Is it faster to use Count?
488
+ if len (r .toReplace )* countCutOff <= len (s ) {
489
+ for _ , x := range r .toReplace {
490
+ if c := Count (s , x ); c != 0 {
491
+ // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
492
+ newSize += c * (len (r .replacements [x [0 ]]) - 1 )
493
+ anyChanges = true
494
+ }
495
+
496
+ }
497
+ } else {
498
+ for i := 0 ; i < len (s ); i ++ {
499
+ b := s [i ]
500
+ if r .replacements [b ] != nil {
501
+ // See above for explanation of -1
502
+ newSize += len (r .replacements [b ]) - 1
503
+ anyChanges = true
504
+ }
470
505
}
471
506
}
472
507
if ! anyChanges {
473
508
return s
474
509
}
475
510
buf := make ([]byte , newSize )
476
- bi := buf
511
+ j := 0
477
512
for i := 0 ; i < len (s ); i ++ {
478
513
b := s [i ]
479
- if r [b ] != nil {
480
- n := copy (bi , r [b ])
481
- bi = bi [n :]
514
+ if r .replacements [b ] != nil {
515
+ j += copy (buf [j :], r .replacements [b ])
482
516
} else {
483
- bi [ 0 ] = b
484
- bi = bi [ 1 :]
517
+ buf [ j ] = b
518
+ j ++
485
519
}
486
520
}
487
521
return string (buf )
@@ -492,7 +526,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
492
526
last := 0
493
527
for i := 0 ; i < len (s ); i ++ {
494
528
b := s [i ]
495
- if r [b ] == nil {
529
+ if r . replacements [b ] == nil {
496
530
continue
497
531
}
498
532
if last != i {
@@ -503,7 +537,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
503
537
}
504
538
}
505
539
last = i + 1
506
- nw , err := w .Write (r [b ])
540
+ nw , err := w .Write (r . replacements [b ])
507
541
n += nw
508
542
if err != nil {
509
543
return n , err
0 commit comments