Skip to content

Commit 3a181dc

Browse files
committed
archive/zip: fix handling of replacement rune in UTF8 check
The replacement rune is a valid rune and can appear as itself in valid UTF8 (it encodes as three bytes). To check for invalid UTF8 it is necessary to look for utf8.DecodeRune returning the replacement rune and size==1. Change-Id: I169be8d1fe61605c921ac13cc2fde94f80f3463c Reviewed-on: https://go-review.googlesource.com/78126 Run-TryBot: Russ Cox <[email protected]> Reviewed-by: Joe Tsai <[email protected]>
1 parent 7de9e5e commit 3a181dc

File tree

2 files changed

+19
-13
lines changed

2 files changed

+19
-13
lines changed

src/archive/zip/writer.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,17 @@ func (w *Writer) Create(name string) (io.Writer, error) {
219219
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
220220
// or any other common encoding).
221221
func detectUTF8(s string) (valid, require bool) {
222-
for _, r := range s {
222+
for i := 0; i < len(s); {
223+
r, size := utf8.DecodeRuneInString(s[i:])
224+
i += size
223225
// Officially, ZIP uses CP-437, but many readers use the system's
224226
// local character encoding. Most encoding are compatible with a large
225227
// subset of CP-437, which itself is ASCII-like.
226228
//
227229
// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
228230
// characters with localized currency and overline characters.
229231
if r < 0x20 || r > 0x7d || r == 0x5c {
230-
if !utf8.ValidRune(r) || r == utf8.RuneError {
232+
if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
231233
return false, false
232234
}
233235
require = true

src/archive/zip/writer_test.go

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,40 +136,45 @@ func TestWriterUTF8(t *testing.T) {
136136
var utf8Tests = []struct {
137137
name string
138138
comment string
139-
expect uint16
140139
nonUTF8 bool
140+
flags uint16
141141
}{
142142
{
143143
name: "hi, hello",
144144
comment: "in the world",
145-
expect: 0x8,
145+
flags: 0x8,
146146
},
147147
{
148148
name: "hi, こんにちわ",
149149
comment: "in the world",
150-
expect: 0x808,
150+
flags: 0x808,
151151
},
152152
{
153153
name: "hi, こんにちわ",
154154
comment: "in the world",
155155
nonUTF8: true,
156-
expect: 0x8,
156+
flags: 0x8,
157157
},
158158
{
159159
name: "hi, hello",
160160
comment: "in the 世界",
161-
expect: 0x808,
161+
flags: 0x808,
162162
},
163163
{
164164
name: "hi, こんにちわ",
165165
comment: "in the 世界",
166-
expect: 0x808,
166+
flags: 0x808,
167+
},
168+
{
169+
name: "the replacement rune is �",
170+
comment: "the replacement rune is �",
171+
flags: 0x808,
167172
},
168173
{
169174
// Name is Japanese encoded in Shift JIS.
170175
name: "\x93\xfa\x96{\x8c\xea.txt",
171176
comment: "in the 世界",
172-
expect: 0x008, // UTF-8 must not be set
177+
flags: 0x008, // UTF-8 must not be set
173178
},
174179
}
175180

@@ -201,10 +206,9 @@ func TestWriterUTF8(t *testing.T) {
201206
t.Fatal(err)
202207
}
203208
for i, test := range utf8Tests {
204-
got := r.File[i].Flags
205-
t.Logf("name %v, comment %v", test.name, test.comment)
206-
if got != test.expect {
207-
t.Fatalf("Flags: got %v, want %v", got, test.expect)
209+
flags := r.File[i].Flags
210+
if flags != test.flags {
211+
t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags)
208212
}
209213
}
210214
}

0 commit comments

Comments
 (0)