@@ -7,6 +7,7 @@ package cmp
7
7
import (
8
8
"bytes"
9
9
"fmt"
10
+ "math"
10
11
"reflect"
11
12
"strconv"
12
13
"strings"
@@ -96,30 +97,29 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
96
97
}
97
98
98
99
// Auto-detect the type of the data.
99
- var isLinedText , isText , isBinary bool
100
100
var sx , sy string
101
101
var ssx , ssy []string
102
+ var isString , isMostlyText , isPureLinedText , isBinary bool
102
103
switch {
103
104
case t .Kind () == reflect .String :
104
105
sx , sy = vx .String (), vy .String ()
105
- isText = true // Initial estimate, verify later
106
+ isString = true
106
107
case t .Kind () == reflect .Slice && t .Elem () == reflect .TypeOf (byte (0 )):
107
108
sx , sy = string (vx .Bytes ()), string (vy .Bytes ())
108
- isBinary = true // Initial estimate, verify later
109
+ isString = true
109
110
case t .Kind () == reflect .Array :
110
111
// Arrays need to be addressable for slice operations to work.
111
112
vx2 , vy2 := reflect .New (t ).Elem (), reflect .New (t ).Elem ()
112
113
vx2 .Set (vx )
113
114
vy2 .Set (vy )
114
115
vx , vy = vx2 , vy2
115
116
}
116
- if isText || isBinary {
117
- var numLines , lastLineIdx , maxLineLen int
118
- isBinary = ! utf8 .ValidString (sx ) || ! utf8 .ValidString (sy )
117
+ if isString {
118
+ var numTotalRunes , numValidRunes , numLines , lastLineIdx , maxLineLen int
119
119
for i , r := range sx + sy {
120
- if ! ( unicode . IsPrint ( r ) || unicode . IsSpace ( r )) || r == utf8 . RuneError {
121
- isBinary = true
122
- break
120
+ numTotalRunes ++
121
+ if ( unicode . IsPrint ( r ) || unicode . IsSpace ( r )) && r != utf8 . RuneError {
122
+ numValidRunes ++
123
123
}
124
124
if r == '\n' {
125
125
if maxLineLen < i - lastLineIdx {
@@ -129,12 +129,14 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
129
129
numLines ++
130
130
}
131
131
}
132
- isText = ! isBinary
133
- isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
132
+ isPureText := numValidRunes == numTotalRunes
133
+ isMostlyText = float64 (numValidRunes ) > math .Floor (0.90 * float64 (numTotalRunes ))
134
+ isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
135
+ isBinary = ! isMostlyText
134
136
135
137
// Avoid diffing by lines if it produces a significantly more complex
136
138
// edit script than diffing by bytes.
137
- if isLinedText {
139
+ if isPureLinedText {
138
140
ssx = strings .Split (sx , "\n " )
139
141
ssy = strings .Split (sy , "\n " )
140
142
esLines := diff .Difference (len (ssx ), len (ssy ), func (ix , iy int ) diff.Result {
@@ -145,7 +147,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
145
147
})
146
148
efficiencyLines := float64 (esLines .Dist ()) / float64 (len (esLines ))
147
149
efficiencyBytes := float64 (esBytes .Dist ()) / float64 (len (esBytes ))
148
- isLinedText = efficiencyLines < 4 * efficiencyBytes
150
+ isPureLinedText = efficiencyLines < 4 * efficiencyBytes
149
151
}
150
152
}
151
153
@@ -155,7 +157,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
155
157
switch {
156
158
// If the text appears to be multi-lined text,
157
159
// then perform differencing across individual lines.
158
- case isLinedText :
160
+ case isPureLinedText :
159
161
list = opts .formatDiffSlice (
160
162
reflect .ValueOf (ssx ), reflect .ValueOf (ssy ), 1 , "line" ,
161
163
func (v reflect.Value , d diffMode ) textRecord {
@@ -244,15 +246,14 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
244
246
// If the text appears to be single-lined text,
245
247
// then perform differencing in approximately fixed-sized chunks.
246
248
// The output is printed as quoted strings.
247
- case isText :
249
+ case isMostlyText :
248
250
list = opts .formatDiffSlice (
249
251
reflect .ValueOf (sx ), reflect .ValueOf (sy ), 64 , "byte" ,
250
252
func (v reflect.Value , d diffMode ) textRecord {
251
253
s := formatString (v .String ())
252
254
return textRecord {Diff : d , Value : textLine (s )}
253
255
},
254
256
)
255
- delim = ""
256
257
257
258
// If the text appears to be binary data,
258
259
// then perform differencing in approximately fixed-sized chunks.
@@ -314,7 +315,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
314
315
315
316
// Wrap the output with appropriate type information.
316
317
var out textNode = & textWrap {Prefix : "{" , Value : list , Suffix : "}" }
317
- if ! isText {
318
+ if ! isMostlyText {
318
319
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
319
320
// Emit the type for extra clarity (e.g. "string{...}").
320
321
if t .Kind () == reflect .String {
0 commit comments