Skip to content

Commit 39a62f7

Browse files
authored
fix: values escaping bugs (#727)
Issues with underscores and large unicode value conversion Signed-off-by: Owen Williams <[email protected]>
1 parent 7ed4523 commit 39a62f7

File tree

2 files changed

+38
-22
lines changed

2 files changed

+38
-22
lines changed

model/metric.go

+10-18
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"fmt"
1919
"regexp"
2020
"sort"
21+
"strconv"
2122
"strings"
2223
"unicode/utf8"
2324

@@ -270,10 +271,6 @@ func metricNeedsEscaping(m *dto.Metric) bool {
270271
return false
271272
}
272273

273-
const (
274-
lowerhex = "0123456789abcdef"
275-
)
276-
277274
// EscapeName escapes the incoming name according to the provided escaping
278275
// scheme. Depending on the rules of escaping, this may cause no change in the
279276
// string that is returned. (Especially NoEscaping, which by definition is a
@@ -308,7 +305,7 @@ func EscapeName(name string, scheme EscapingScheme) string {
308305
} else if isValidLegacyRune(b, i) {
309306
escaped.WriteRune(b)
310307
} else {
311-
escaped.WriteRune('_')
308+
escaped.WriteString("__")
312309
}
313310
}
314311
return escaped.String()
@@ -318,21 +315,15 @@ func EscapeName(name string, scheme EscapingScheme) string {
318315
}
319316
escaped.WriteString("U__")
320317
for i, b := range name {
321-
if isValidLegacyRune(b, i) {
318+
if b == '_' {
319+
escaped.WriteString("__")
320+
} else if isValidLegacyRune(b, i) {
322321
escaped.WriteRune(b)
323322
} else if !utf8.ValidRune(b) {
324323
escaped.WriteString("_FFFD_")
325-
} else if b < 0x100 {
326-
escaped.WriteRune('_')
327-
for s := 4; s >= 0; s -= 4 {
328-
escaped.WriteByte(lowerhex[b>>uint(s)&0xF])
329-
}
330-
escaped.WriteRune('_')
331-
} else if b < 0x10000 {
324+
} else {
332325
escaped.WriteRune('_')
333-
for s := 12; s >= 0; s -= 4 {
334-
escaped.WriteByte(lowerhex[b>>uint(s)&0xF])
335-
}
326+
escaped.WriteString(strconv.FormatInt(int64(b), 16))
336327
escaped.WriteRune('_')
337328
}
338329
}
@@ -390,8 +381,9 @@ func UnescapeName(name string, scheme EscapingScheme) string {
390381
// We think we are in a UTF-8 code, process it.
391382
var utf8Val uint
392383
for j := 0; i < len(escapedName); j++ {
393-
// This is too many characters for a utf8 value.
394-
if j > 4 {
384+
// This is too many characters for a utf8 value based on the MaxRune
385+
// value of '\U0010FFFF'.
386+
if j >= 6 {
395387
return name
396388
}
397389
// Found a closing underscore, convert to a rune, check validity, and append.

model/metric_test.go

+28-4
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,14 @@ func TestEscapeName(t *testing.T) {
261261
expectedUnescapedDots: "mysystem.prod.west.cpu.load",
262262
expectedValue: "U__mysystem_2e_prod_2e_west_2e_cpu_2e_load",
263263
},
264+
{
265+
name: "name with dots and underscore",
266+
input: "mysystem.prod.west.cpu.load_total",
267+
expectedUnderscores: "mysystem_prod_west_cpu_load_total",
268+
expectedDots: "mysystem_dot_prod_dot_west_dot_cpu_dot_load__total",
269+
expectedUnescapedDots: "mysystem.prod.west.cpu.load_total",
270+
expectedValue: "U__mysystem_2e_prod_2e_west_2e_cpu_2e_load__total",
271+
},
264272
{
265273
name: "name with dots and colon",
266274
input: "http.status:sum",
@@ -269,16 +277,32 @@ func TestEscapeName(t *testing.T) {
269277
expectedUnescapedDots: "http.status:sum",
270278
expectedValue: "U__http_2e_status:sum",
271279
},
280+
{
281+
name: "name with spaces and emoji",
282+
input: "label with 😱",
283+
expectedUnderscores: "label_with__",
284+
expectedDots: "label__with____",
285+
expectedUnescapedDots: "label_with__",
286+
expectedValue: "U__label_20_with_20__1f631_",
287+
},
272288
{
273289
name: "name with unicode characters > 0x100",
274290
input: "花火",
275291
expectedUnderscores: "__",
276-
expectedDots: "__",
292+
expectedDots: "____",
277293
// Dots-replacement does not know the difference between two replaced
278294
// characters and a single underscore.
279-
expectedUnescapedDots: "_",
295+
expectedUnescapedDots: "__",
280296
expectedValue: "U___82b1__706b_",
281297
},
298+
{
299+
name: "name with spaces and edge-case value",
300+
input: "label with \u0100",
301+
expectedUnderscores: "label_with__",
302+
expectedDots: "label__with____",
303+
expectedUnescapedDots: "label_with__",
304+
expectedValue: "U__label_20_with_20__100_",
305+
},
282306
}
283307

284308
for _, scenario := range scenarios {
@@ -564,7 +588,7 @@ func TestEscapeMetricFamily(t *testing.T) {
564588
},
565589
},
566590
expected: &dto.MetricFamily{
567-
Name: proto.String("unicode_dot_and_dot_dots_dot___"),
591+
Name: proto.String("unicode_dot_and_dot_dots_dot_____"),
568592
Help: proto.String("some help text"),
569593
Type: dto.MetricType_GAUGE.Enum(),
570594
Metric: []*dto.Metric{
@@ -575,7 +599,7 @@ func TestEscapeMetricFamily(t *testing.T) {
575599
Label: []*dto.LabelPair{
576600
{
577601
Name: proto.String("__name__"),
578-
Value: proto.String("unicode_dot_and_dot_dots_dot___"),
602+
Value: proto.String("unicode_dot_and_dot_dots_dot_____"),
579603
},
580604
{
581605
Name: proto.String("some_label"),

0 commit comments

Comments
 (0)