Skip to content

Commit b1a48af

Browse files
committed
encoding/json: properly encode strings with ",string" again
golang.org/cl/193604 fixed one bug when one encodes a string with the ",string" option: if SetEscapeHTML(false) is used, we should not be using HTML escaping for the inner string encoding. The CL correctly fixed that. The CL also tried to speed up this edge case. By avoiding an entire new call to Marshal, the new Issue34127 benchmark reduced its time/op by 45%, and lowered the allocs/op from 3 to 2. However, that last optimization wasn't correct: Since Go 1.2 every string can be marshaled to JSON without error even if it contains invalid UTF-8 byte sequences. Therefore there is no need to use Marshal again for the only reason of enclosing the string in double quotes. JSON string encoding isn't just about adding quotes and taking care of invalid UTF-8. We also need to escape some characters, like tabs and newlines. The new code failed to do that. The bug resulted in the added test case failing to roundtrip properly; before our fix here, we'd see an error: invalid use of ,string struct tag, trying to unmarshal "\"\b\f\n\r\t\"\\\"" into string If you pay close attention, you'll notice that the special characters like tab and newline are only encoded once, not twice. When decoding with the ",string" option, the outer string decode works, but the inner string decode fails, as we are now decoding a JSON string with unescaped special characters. The fix we apply here isn't to go back to Marshal, as that would re-introduce the bug with SetEscapeHTML(false). Instead, we can use a new encode state from the pool - it results in minimal performance impact, and even reduces allocs/op further. The performance impact seems fair, given that we need to check the entire string for characters that need to be escaped. name old time/op new time/op delta Issue34127-8 89.7ns ± 2% 100.8ns ± 1% +12.27% (p=0.000 n=8+8) name old alloc/op new alloc/op delta Issue34127-8 40.0B ± 0% 32.0B ± 0% -20.00% (p=0.000 n=8+8) name old allocs/op new allocs/op delta Issue34127-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=8+8) Instead of adding another standalone test, we convert an existing "string tag" test to be table-based, and add another test case there. One test case from the original CL also had to be amended, due to the same problem - when escaping '<' due to SetEscapeHTML(true), we need to end up with double escaping, since we're using ",string". Fixes #38173. Change-Id: I2b0df9e4f1d3452fff74fe910e189c930dde4b5b Reviewed-on: https://go-review.googlesource.com/c/go/+/226498 Run-TryBot: Daniel Martí <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Joe Tsai <[email protected]>
1 parent 98d20fb commit b1a48af

File tree

3 files changed

+69
-37
lines changed

3 files changed

+69
-37
lines changed

src/encoding/json/encode.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -635,11 +635,12 @@ func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) {
635635
return
636636
}
637637
if opts.quoted {
638-
b := make([]byte, 0, v.Len()+2)
639-
b = append(b, '"')
640-
b = append(b, []byte(v.String())...)
641-
b = append(b, '"')
642-
e.stringBytes(b, opts.escapeHTML)
638+
e2 := newEncodeState()
639+
// Since we encode the string twice, we only need to escape HTML
640+
// the first time.
641+
e2.string(v.String(), opts.escapeHTML)
642+
e.stringBytes(e2.Bytes(), false)
643+
encodeStatePool.Put(e2)
643644
} else {
644645
e.string(v.String(), opts.escapeHTML)
645646
}

src/encoding/json/encode_test.go

+58-29
Original file line numberDiff line numberDiff line change
@@ -79,37 +79,66 @@ type StringTag struct {
7979
NumberStr Number `json:",string"`
8080
}
8181

82-
var stringTagExpected = `{
83-
"BoolStr": "true",
84-
"IntStr": "42",
85-
"UintptrStr": "44",
86-
"StrStr": "\"xzbit\"",
87-
"NumberStr": "46"
88-
}`
89-
90-
func TestStringTag(t *testing.T) {
91-
var s StringTag
92-
s.BoolStr = true
93-
s.IntStr = 42
94-
s.UintptrStr = 44
95-
s.StrStr = "xzbit"
96-
s.NumberStr = "46"
97-
got, err := MarshalIndent(&s, "", " ")
98-
if err != nil {
99-
t.Fatal(err)
100-
}
101-
if got := string(got); got != stringTagExpected {
102-
t.Fatalf(" got: %s\nwant: %s\n", got, stringTagExpected)
82+
func TestRoundtripStringTag(t *testing.T) {
83+
tests := []struct {
84+
name string
85+
in StringTag
86+
want string // empty to just test that we roundtrip
87+
}{
88+
{
89+
name: "AllTypes",
90+
in: StringTag{
91+
BoolStr: true,
92+
IntStr: 42,
93+
UintptrStr: 44,
94+
StrStr: "xzbit",
95+
NumberStr: "46",
96+
},
97+
want: `{
98+
"BoolStr": "true",
99+
"IntStr": "42",
100+
"UintptrStr": "44",
101+
"StrStr": "\"xzbit\"",
102+
"NumberStr": "46"
103+
}`,
104+
},
105+
{
106+
// See golang.org/issues/38173.
107+
name: "StringDoubleEscapes",
108+
in: StringTag{
109+
StrStr: "\b\f\n\r\t\"\\",
110+
NumberStr: "0", // just to satisfy the roundtrip
111+
},
112+
want: `{
113+
"BoolStr": "false",
114+
"IntStr": "0",
115+
"UintptrStr": "0",
116+
"StrStr": "\"\\u0008\\u000c\\n\\r\\t\\\"\\\\\"",
117+
"NumberStr": "0"
118+
}`,
119+
},
103120
}
121+
for _, test := range tests {
122+
t.Run(test.name, func(t *testing.T) {
123+
// Indent with a tab prefix to make the multi-line string
124+
// literals in the table nicer to read.
125+
got, err := MarshalIndent(&test.in, "\t\t\t", "\t")
126+
if err != nil {
127+
t.Fatal(err)
128+
}
129+
if got := string(got); got != test.want {
130+
t.Fatalf(" got: %s\nwant: %s\n", got, test.want)
131+
}
104132

105-
// Verify that it round-trips.
106-
var s2 StringTag
107-
err = NewDecoder(bytes.NewReader(got)).Decode(&s2)
108-
if err != nil {
109-
t.Fatalf("Decode: %v", err)
110-
}
111-
if !reflect.DeepEqual(s, s2) {
112-
t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", s, string(got), s2)
133+
// Verify that it round-trips.
134+
var s2 StringTag
135+
if err := Unmarshal(got, &s2); err != nil {
136+
t.Fatalf("Decode: %v", err)
137+
}
138+
if !reflect.DeepEqual(test.in, s2) {
139+
t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", test.in, string(got), s2)
140+
}
141+
})
113142
}
114143
}
115144

src/encoding/json/stream_test.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,24 @@ func TestEncoderSetEscapeHTML(t *testing.T) {
144144
},
145145
{
146146
"stringOption", stringOption,
147-
`{"bar":"\"\u003chtml\u003efoobar\u003c/html\u003e\""}`,
147+
`{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`,
148148
`{"bar":"\"<html>foobar</html>\""}`,
149149
},
150150
} {
151151
var buf bytes.Buffer
152152
enc := NewEncoder(&buf)
153153
if err := enc.Encode(tt.v); err != nil {
154-
t.Fatalf("Encode(%s): %s", tt.name, err)
154+
t.Errorf("Encode(%s): %s", tt.name, err)
155+
continue
155156
}
156157
if got := strings.TrimSpace(buf.String()); got != tt.wantEscape {
157158
t.Errorf("Encode(%s) = %#q, want %#q", tt.name, got, tt.wantEscape)
158159
}
159160
buf.Reset()
160161
enc.SetEscapeHTML(false)
161162
if err := enc.Encode(tt.v); err != nil {
162-
t.Fatalf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err)
163+
t.Errorf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err)
164+
continue
163165
}
164166
if got := strings.TrimSpace(buf.String()); got != tt.want {
165167
t.Errorf("SetEscapeHTML(false) Encode(%s) = %#q, want %#q",

0 commit comments

Comments
 (0)