Skip to content

Commit 58b4a27

Browse files
unicode: Preserve modifiers in AppendReverse() and ReverseString()
Current implementation does not check for modifier runes, which causes modifiers to be applied to wrong characters after string is reversed. Unicode Character Categories M and Sk are considered as modifiers. Fixes golang/go#50633
1 parent 2df65d7 commit 58b4a27

File tree

2 files changed

+56
-15
lines changed

2 files changed

+56
-15
lines changed

Diff for: unicode/bidi/bidi.go

+37-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package bidi // import "golang.org/x/text/unicode/bidi"
1818

1919
import (
2020
"bytes"
21+
"unicode"
2122
)
2223

2324
// This API tries to avoid dealing with embedding levels for now. Under the hood
@@ -321,23 +322,36 @@ func (r *Run) Pos() (start, end int) {
321322
// and returns the result. Modifiers will still follow the runes they modify.
322323
// Brackets are replaced with their counterparts.
323324
func AppendReverse(out, in []byte) []byte {
324-
ret := make([]byte, len(in)+len(out))
325-
copy(ret, out)
326325
inRunes := bytes.Runes(in)
327-
326+
li := len(inRunes)
327+
ret := make([]rune, li)
328+
modifiers := make([]rune, 0, li)
328329
for i, r := range inRunes {
330+
if unicode.In(r, unicode.M, unicode.Sk) {
331+
modifiers = append(modifiers, r)
332+
continue
333+
}
334+
if len(modifiers) > 0 {
335+
ret[li-i] = ret[li-i+len(modifiers)]
336+
copy(ret[li-i+1:li-i+1+len(modifiers)], modifiers)
337+
modifiers = nil
338+
}
329339
prop, _ := LookupRune(r)
330340
if prop.IsBracket() {
331-
inRunes[i] = prop.reverseBracket(r)
341+
ret[li-i-1] = prop.reverseBracket(r)
342+
} else {
343+
ret[li-i-1] = r
332344
}
333345
}
334-
335-
for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
336-
inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
346+
if len(modifiers) > 0 {
347+
ret[0] = ret[len(modifiers)]
348+
copy(ret[1:1+len(modifiers)], modifiers)
337349
}
338-
copy(ret[len(out):], string(inRunes))
339350

340-
return ret
351+
res := make([]byte, len(in)+len(out))
352+
copy(res, out)
353+
copy(res[len(out):], string(ret))
354+
return res
341355
}
342356

343357
// ReverseString reverses the order of characters in s and returns a new string.
@@ -347,13 +361,27 @@ func ReverseString(s string) string {
347361
input := []rune(s)
348362
li := len(input)
349363
ret := make([]rune, li)
364+
modifiers := make([]rune, 0, li)
350365
for i, r := range input {
366+
if unicode.In(r, unicode.M, unicode.Sk) {
367+
modifiers = append(modifiers, r)
368+
continue
369+
}
370+
if len(modifiers) > 0 {
371+
ret[li-i] = ret[li-i+len(modifiers)]
372+
copy(ret[li-i+1:li-i+1+len(modifiers)], modifiers)
373+
modifiers = nil
374+
}
351375
prop, _ := LookupRune(r)
352376
if prop.IsBracket() {
353377
ret[li-i-1] = prop.reverseBracket(r)
354378
} else {
355379
ret[li-i-1] = r
356380
}
357381
}
382+
if len(modifiers) > 0 {
383+
ret[0] = ret[len(modifiers)]
384+
copy(ret[1:1+len(modifiers)], modifiers)
385+
}
358386
return string(ret)
359387
}

Diff for: unicode/bidi/bidi_test.go

+19-6
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,22 @@ func TestDoubleSetString(t *testing.T) {
321321
}
322322

323323
func TestReverseString(t *testing.T) {
324-
input := "(Hello)"
325-
want := "(olleH)"
326-
if str := ReverseString(input); str != want {
327-
t.Errorf("ReverseString(%s) = %q; want %q", input, str, want)
324+
testcase := []struct {
325+
input string
326+
want string
327+
}{
328+
{"(Hello)", "(olleH)"},
329+
{"nice (world) placé́́", "é́́calp (dlrow) ecin"},
330+
{"äu", "uä"},
331+
{"uä", "äu"},
332+
{"é́́abé́́é́́", "é́́é́́baé́́"},
333+
{"é́́é́́baé́́", "é́́abé́́é́́"},
334+
{"✍🏾✍🏾ab✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾"},
335+
}
336+
for _, tc := range testcase {
337+
if str := ReverseString(tc.input); str != tc.want {
338+
t.Errorf("ReverseString(%s) = %q; want %q", tc.input, str, tc.want)
339+
}
328340
}
329341
}
330342

@@ -335,8 +347,9 @@ func TestAppendReverse(t *testing.T) {
335347
want string
336348
}{
337349
{"", "Hëllo", "Hëllo"},
338-
{"nice (wörld)", "", "(dlröw) ecin"},
339-
{"nice (wörld)", "Hëllo", "Hëllo(dlröw) ecin"},
350+
{"nice (wörld) placé́́", "", "é́́calp (dlröw) ecin"},
351+
{"nice (wörld) placé́́", "Hëllo", "Hëlloé́́calp (dlröw) ecin"},
352+
{"✍🏾✍🏾ab✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾✍🏾✍🏾ba✍🏾✍🏾"},
340353
}
341354
for _, tc := range testcase {
342355
if r := AppendReverse([]byte(tc.outString), []byte(tc.inString)); string(r) != tc.want {

0 commit comments

Comments
 (0)