Skip to content

Commit 27174d5

Browse files
imorphdhartunian
authored andcommitted
add: linear search implementation (+ benchmarks)
Original PR: prometheus#1673 Which was discussed in more detail in a prior iteration here: prometheus#1662 Signed-off-by: Ivan Goncharov <[email protected]>
1 parent de5fb5f commit 27174d5

File tree

2 files changed

+376
-9
lines changed

2 files changed

+376
-9
lines changed

Diff for: prometheus/histogram.go

+29-9
Original file line numberDiff line numberDiff line change
@@ -797,15 +797,35 @@ func (h *histogram) Write(out *dto.Metric) error {
797797
// FindBucket returns the index of the bucket for the provided value, or
798798
// len(h.upperBounds) for the +Inf bucket.
799799
func (h *histogram) FindBucket(v float64) int {
800-
// TODO(beorn7): For small numbers of buckets (<30), a linear search is
801-
// slightly faster than the binary search. If we really care, we could
802-
// switch from one search strategy to the other depending on the number
803-
// of buckets.
804-
//
805-
// Microbenchmarks (BenchmarkHistogramNoLabels):
806-
// 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op
807-
// 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op
808-
// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
800+
n := len(h.upperBounds)
801+
if n == 0 {
802+
return 0
803+
}
804+
805+
// Early exit: if v is less than or equal to the first upper bound, return 0
806+
if v <= h.upperBounds[0] {
807+
return 0
808+
}
809+
810+
// Early exit: if v is greater than the last upper bound, return len(h.upperBounds)
811+
if v > h.upperBounds[n-1] {
812+
return n
813+
}
814+
815+
// For small arrays, use simple linear search
816+
// "magic number" 35 is result of tests on couple different (AWS and baremetal) servers
817+
// see more details here: https://github.com/prometheus/client_golang/pull/1662
818+
if n < 35 {
819+
for i, bound := range h.upperBounds {
820+
if v <= bound {
821+
return i
822+
}
823+
}
824+
// If v is greater than all upper bounds, return len(h.upperBounds)
825+
return n
826+
}
827+
828+
// For larger arrays, use stdlib's binary search
809829
return sort.SearchFloat64s(h.upperBounds, v)
810830
}
811831

Diff for: prometheus/histogram_test.go

+347
Original file line numberDiff line numberDiff line change
@@ -875,3 +875,350 @@ func TestGetLe(t *testing.T) {
875875
}
876876
}
877877
}
878+
879+
func TestHistogramCreatedTimestamp(t *testing.T) {
880+
now := time.Now()
881+
882+
histogram := NewHistogram(HistogramOpts{
883+
Name: "test",
884+
Help: "test help",
885+
Buckets: []float64{1, 2, 3, 4},
886+
now: func() time.Time { return now },
887+
})
888+
889+
var metric dto.Metric
890+
if err := histogram.Write(&metric); err != nil {
891+
t.Fatal(err)
892+
}
893+
894+
if metric.Histogram.CreatedTimestamp.AsTime().Unix() != now.Unix() {
895+
t.Errorf("expected created timestamp %d, got %d", now.Unix(), metric.Histogram.CreatedTimestamp.AsTime().Unix())
896+
}
897+
}
898+
899+
func TestHistogramVecCreatedTimestamp(t *testing.T) {
900+
now := time.Now()
901+
902+
histogramVec := NewHistogramVec(HistogramOpts{
903+
Name: "test",
904+
Help: "test help",
905+
Buckets: []float64{1, 2, 3, 4},
906+
now: func() time.Time { return now },
907+
}, []string{"label"})
908+
histogram := histogramVec.WithLabelValues("value").(Histogram)
909+
910+
var metric dto.Metric
911+
if err := histogram.Write(&metric); err != nil {
912+
t.Fatal(err)
913+
}
914+
915+
if metric.Histogram.CreatedTimestamp.AsTime().Unix() != now.Unix() {
916+
t.Errorf("expected created timestamp %d, got %d", now.Unix(), metric.Histogram.CreatedTimestamp.AsTime().Unix())
917+
}
918+
}
919+
920+
func TestHistogramVecCreatedTimestampWithDeletes(t *testing.T) {
921+
now := time.Now()
922+
923+
histogramVec := NewHistogramVec(HistogramOpts{
924+
Name: "test",
925+
Help: "test help",
926+
Buckets: []float64{1, 2, 3, 4},
927+
now: func() time.Time { return now },
928+
}, []string{"label"})
929+
930+
// First use of "With" should populate CT.
931+
histogramVec.WithLabelValues("1")
932+
expected := map[string]time.Time{"1": now}
933+
934+
now = now.Add(1 * time.Hour)
935+
expectCTsForMetricVecValues(t, histogramVec.MetricVec, dto.MetricType_HISTOGRAM, expected)
936+
937+
// Two more labels at different times.
938+
histogramVec.WithLabelValues("2")
939+
expected["2"] = now
940+
941+
now = now.Add(1 * time.Hour)
942+
943+
histogramVec.WithLabelValues("3")
944+
expected["3"] = now
945+
946+
now = now.Add(1 * time.Hour)
947+
expectCTsForMetricVecValues(t, histogramVec.MetricVec, dto.MetricType_HISTOGRAM, expected)
948+
949+
// Recreate metric instance should reset created timestamp to now.
950+
histogramVec.DeleteLabelValues("1")
951+
histogramVec.WithLabelValues("1")
952+
expected["1"] = now
953+
954+
now = now.Add(1 * time.Hour)
955+
expectCTsForMetricVecValues(t, histogramVec.MetricVec, dto.MetricType_HISTOGRAM, expected)
956+
}
957+
958+
func TestNewConstHistogramWithCreatedTimestamp(t *testing.T) {
959+
metricDesc := NewDesc(
960+
"sample_value",
961+
"sample value",
962+
nil,
963+
nil,
964+
)
965+
buckets := map[float64]uint64{25: 100, 50: 200}
966+
createdTs := time.Unix(1719670764, 123)
967+
968+
h, err := NewConstHistogramWithCreatedTimestamp(metricDesc, 100, 200, buckets, createdTs)
969+
if err != nil {
970+
t.Fatal(err)
971+
}
972+
973+
var metric dto.Metric
974+
if err := h.Write(&metric); err != nil {
975+
t.Fatal(err)
976+
}
977+
978+
if metric.Histogram.CreatedTimestamp.AsTime().UnixMicro() != createdTs.UnixMicro() {
979+
t.Errorf("Expected created timestamp %v, got %v", createdTs, &metric.Histogram.CreatedTimestamp)
980+
}
981+
}
982+
983+
func TestNativeHistogramExemplar(t *testing.T) {
984+
// Test the histogram with positive NativeHistogramExemplarTTL and NativeHistogramMaxExemplars
985+
h := NewHistogram(HistogramOpts{
986+
Name: "test",
987+
Help: "test help",
988+
Buckets: []float64{1, 2, 3, 4},
989+
NativeHistogramBucketFactor: 1.1,
990+
NativeHistogramMaxExemplars: 3,
991+
NativeHistogramExemplarTTL: 10 * time.Second,
992+
}).(*histogram)
993+
994+
tcs := []struct {
995+
name string
996+
addFunc func(*histogram)
997+
expectedValues []float64
998+
}{
999+
{
1000+
name: "add exemplars to the limit",
1001+
addFunc: func(h *histogram) {
1002+
h.ObserveWithExemplar(1, Labels{"id": "1"})
1003+
h.ObserveWithExemplar(3, Labels{"id": "1"})
1004+
h.ObserveWithExemplar(5, Labels{"id": "1"})
1005+
},
1006+
expectedValues: []float64{1, 3, 5},
1007+
},
1008+
{
1009+
name: "remove exemplar in closest pair, the removed index equals to inserted index",
1010+
addFunc: func(h *histogram) {
1011+
h.ObserveWithExemplar(4, Labels{"id": "1"})
1012+
},
1013+
expectedValues: []float64{1, 3, 4},
1014+
},
1015+
{
1016+
name: "remove exemplar in closest pair, the removed index is bigger than inserted index",
1017+
addFunc: func(h *histogram) {
1018+
h.ObserveWithExemplar(0, Labels{"id": "1"})
1019+
},
1020+
expectedValues: []float64{0, 1, 4},
1021+
},
1022+
{
1023+
name: "remove exemplar with oldest timestamp, the removed index is smaller than inserted index",
1024+
addFunc: func(h *histogram) {
1025+
h.now = func() time.Time { return time.Now().Add(time.Second * 11) }
1026+
h.ObserveWithExemplar(6, Labels{"id": "1"})
1027+
},
1028+
expectedValues: []float64{0, 4, 6},
1029+
},
1030+
}
1031+
1032+
for _, tc := range tcs {
1033+
t.Run(tc.name, func(t *testing.T) {
1034+
tc.addFunc(h)
1035+
compareNativeExemplarValues(t, h.nativeExemplars.exemplars, tc.expectedValues)
1036+
})
1037+
}
1038+
1039+
// Test the histogram with negative NativeHistogramExemplarTTL
1040+
h = NewHistogram(HistogramOpts{
1041+
Name: "test",
1042+
Help: "test help",
1043+
Buckets: []float64{1, 2, 3, 4},
1044+
NativeHistogramBucketFactor: 1.1,
1045+
NativeHistogramMaxExemplars: 3,
1046+
NativeHistogramExemplarTTL: -1 * time.Second,
1047+
}).(*histogram)
1048+
1049+
tcs = []struct {
1050+
name string
1051+
addFunc func(*histogram)
1052+
expectedValues []float64
1053+
}{
1054+
{
1055+
name: "add exemplars to the limit",
1056+
addFunc: func(h *histogram) {
1057+
h.ObserveWithExemplar(1, Labels{"id": "1"})
1058+
h.ObserveWithExemplar(3, Labels{"id": "1"})
1059+
h.ObserveWithExemplar(5, Labels{"id": "1"})
1060+
},
1061+
expectedValues: []float64{1, 3, 5},
1062+
},
1063+
{
1064+
name: "remove exemplar with oldest timestamp, the removed index is smaller than inserted index",
1065+
addFunc: func(h *histogram) {
1066+
h.ObserveWithExemplar(4, Labels{"id": "1"})
1067+
},
1068+
expectedValues: []float64{3, 4, 5},
1069+
},
1070+
{
1071+
name: "remove exemplar with oldest timestamp, the removed index equals to inserted index",
1072+
addFunc: func(h *histogram) {
1073+
h.ObserveWithExemplar(0, Labels{"id": "1"})
1074+
},
1075+
expectedValues: []float64{0, 4, 5},
1076+
},
1077+
{
1078+
name: "remove exemplar with oldest timestamp, the removed index is bigger than inserted index",
1079+
addFunc: func(h *histogram) {
1080+
h.ObserveWithExemplar(3, Labels{"id": "1"})
1081+
},
1082+
expectedValues: []float64{0, 3, 4},
1083+
},
1084+
}
1085+
1086+
for _, tc := range tcs {
1087+
t.Run(tc.name, func(t *testing.T) {
1088+
tc.addFunc(h)
1089+
compareNativeExemplarValues(t, h.nativeExemplars.exemplars, tc.expectedValues)
1090+
})
1091+
}
1092+
1093+
// Test the histogram with negative NativeHistogramMaxExemplars
1094+
h = NewHistogram(HistogramOpts{
1095+
Name: "test",
1096+
Help: "test help",
1097+
Buckets: []float64{1, 2, 3, 4},
1098+
NativeHistogramBucketFactor: 1.1,
1099+
NativeHistogramMaxExemplars: -1,
1100+
NativeHistogramExemplarTTL: -1 * time.Second,
1101+
}).(*histogram)
1102+
1103+
tcs = []struct {
1104+
name string
1105+
addFunc func(*histogram)
1106+
expectedValues []float64
1107+
}{
1108+
{
1109+
name: "add exemplars to the limit, but no effect",
1110+
addFunc: func(h *histogram) {
1111+
h.ObserveWithExemplar(1, Labels{"id": "1"})
1112+
h.ObserveWithExemplar(3, Labels{"id": "1"})
1113+
h.ObserveWithExemplar(5, Labels{"id": "1"})
1114+
},
1115+
expectedValues: []float64{},
1116+
},
1117+
}
1118+
1119+
for _, tc := range tcs {
1120+
t.Run(tc.name, func(t *testing.T) {
1121+
tc.addFunc(h)
1122+
compareNativeExemplarValues(t, h.nativeExemplars.exemplars, tc.expectedValues)
1123+
})
1124+
}
1125+
}
1126+
1127+
func compareNativeExemplarValues(t *testing.T, exps []*dto.Exemplar, values []float64) {
1128+
if len(exps) != len(values) {
1129+
t.Errorf("the count of exemplars is not %d", len(values))
1130+
}
1131+
for i, e := range exps {
1132+
if e.GetValue() != values[i] {
1133+
t.Errorf("the %dth exemplar value %v is not as expected: %v", i, e.GetValue(), values[i])
1134+
}
1135+
}
1136+
}
1137+
1138+
var resultFindBucket int
1139+
1140+
func benchmarkFindBucket(b *testing.B, l int) {
1141+
h := &histogram{upperBounds: make([]float64, l)}
1142+
for i := range h.upperBounds {
1143+
h.upperBounds[i] = float64(i)
1144+
}
1145+
v := float64(l / 2)
1146+
1147+
b.ResetTimer()
1148+
for i := 0; i < b.N; i++ {
1149+
resultFindBucket = h.FindBucket(v)
1150+
}
1151+
}
1152+
1153+
func BenchmarkFindBucketShort(b *testing.B) {
1154+
benchmarkFindBucket(b, 20)
1155+
}
1156+
1157+
func BenchmarkFindBucketMid(b *testing.B) {
1158+
benchmarkFindBucket(b, 40)
1159+
}
1160+
1161+
func BenchmarkFindBucketLarge(b *testing.B) {
1162+
benchmarkFindBucket(b, 100)
1163+
}
1164+
1165+
func BenchmarkFindBucketHuge(b *testing.B) {
1166+
benchmarkFindBucket(b, 500)
1167+
}
1168+
1169+
func BenchmarkFindBucketInf(b *testing.B) {
1170+
h := &histogram{upperBounds: make([]float64, 500)}
1171+
for i := range h.upperBounds {
1172+
h.upperBounds[i] = float64(i)
1173+
}
1174+
v := 1000.5
1175+
1176+
b.ResetTimer()
1177+
for i := 0; i < b.N; i++ {
1178+
resultFindBucket = h.FindBucket(v)
1179+
}
1180+
}
1181+
1182+
func BenchmarkFindBucketLow(b *testing.B) {
1183+
h := &histogram{upperBounds: make([]float64, 500)}
1184+
for i := range h.upperBounds {
1185+
h.upperBounds[i] = float64(i)
1186+
}
1187+
v := -1.1
1188+
1189+
b.ResetTimer()
1190+
for i := 0; i < b.N; i++ {
1191+
resultFindBucket = h.FindBucket(v)
1192+
}
1193+
}
1194+
1195+
func TestFindBucket(t *testing.T) {
1196+
smallHistogram := &histogram{upperBounds: []float64{1, 2, 3, 4, 5}}
1197+
largeHistogram := &histogram{upperBounds: make([]float64, 50)}
1198+
for i := range largeHistogram.upperBounds {
1199+
largeHistogram.upperBounds[i] = float64(i)
1200+
}
1201+
1202+
tests := []struct {
1203+
h *histogram
1204+
v float64
1205+
expected int
1206+
}{
1207+
{smallHistogram, -1, 0},
1208+
{smallHistogram, 0.5, 0},
1209+
{smallHistogram, 2.5, 2},
1210+
{smallHistogram, 5.5, 5},
1211+
{largeHistogram, -1, 0},
1212+
{largeHistogram, 25.5, 26},
1213+
{largeHistogram, 49.5, 50},
1214+
{largeHistogram, 50.5, 50},
1215+
{largeHistogram, 5000.5, 50},
1216+
}
1217+
1218+
for _, tt := range tests {
1219+
result := tt.h.FindBucket(tt.v)
1220+
if result != tt.expected {
1221+
t.Errorf("findBucket(%v) = %d; expected %d", tt.v, result, tt.expected)
1222+
}
1223+
}
1224+
}

0 commit comments

Comments
 (0)