Skip to content

Commit d7c9cd8

Browse files
authored
Add support for native histograms in OM parser (#1040)
* Start on native histogram parser * Fix regex for nh sample * Get nh sample appended * Complete parsing for simple native histogram * Add parsing for native histograms with labels, fix linting * Mitigate type and style errors * Add test for parsing coexisting native and classic hist with simple label set * Solve error in Python 3.9 tests * Add test for native + classic histograms with more than a label set and adapt logic accordigly * Separate native histogram from value field, improve conditional/try blocks * Clean up debug lines, add warnings, delete unnecessary lines Signed-off-by: Arianna Vespri <[email protected]>
1 parent 3b183b4 commit d7c9cd8

File tree

8 files changed

+245
-37
lines changed

8 files changed

+245
-37
lines changed

prometheus_client/core.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
66
)
77
from .registry import CollectorRegistry, REGISTRY
8-
from .samples import Exemplar, Sample, Timestamp
8+
from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
99

1010
__all__ = (
11+
'BucketSpan',
1112
'CollectorRegistry',
1213
'Counter',
1314
'CounterMetricFamily',
@@ -21,6 +22,7 @@
2122
'Info',
2223
'InfoMetricFamily',
2324
'Metric',
25+
'NativeHistogram',
2426
'REGISTRY',
2527
'Sample',
2628
'StateSetMetricFamily',

prometheus_client/metrics.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ def describe(self) -> Iterable[Metric]:
111111

112112
def collect(self) -> Iterable[Metric]:
113113
metric = self._get_metric()
114-
for suffix, labels, value, timestamp, exemplar in self._samples():
115-
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar)
114+
for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
115+
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
116116
return [metric]
117117

118118
def __str__(self) -> str:
@@ -246,8 +246,8 @@ def _multi_samples(self) -> Iterable[Sample]:
246246
metrics = self._metrics.copy()
247247
for labels, metric in metrics.items():
248248
series_labels = list(zip(self._labelnames, labels))
249-
for suffix, sample_labels, value, timestamp, exemplar in metric._samples():
250-
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar)
249+
for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
250+
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)
251251

252252
def _child_samples(self) -> Iterable[Sample]: # pragma: no cover
253253
raise NotImplementedError('_child_samples() must be implemented by %r' % self)

prometheus_client/metrics_core.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22
from typing import Dict, List, Optional, Sequence, Tuple, Union
33

4-
from .samples import Exemplar, Sample, Timestamp
4+
from .samples import Exemplar, NativeHistogram, Sample, Timestamp
55

66
METRIC_TYPES = (
77
'counter', 'gauge', 'summary', 'histogram',
@@ -36,11 +36,11 @@ def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
3636
self.type: str = typ
3737
self.samples: List[Sample] = []
3838

39-
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None:
39+
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
4040
"""Add a sample to the metric.
4141
4242
Internal-only, do not use."""
43-
self.samples.append(Sample(name, labels, value, timestamp, exemplar))
43+
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))
4444

4545
def __eq__(self, other: object) -> bool:
4646
return (isinstance(other, Metric)
@@ -284,7 +284,6 @@ def add_metric(self,
284284
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))
285285

286286

287-
288287
class GaugeHistogramMetricFamily(Metric):
289288
"""A single gauge histogram and its samples.
290289

prometheus_client/multiprocess.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def _accumulate_metrics(metrics, accumulate):
9393
buckets = defaultdict(lambda: defaultdict(float))
9494
samples_setdefault = samples.setdefault
9595
for s in metric.samples:
96-
name, labels, value, timestamp, exemplar = s
96+
name, labels, value, timestamp, exemplar, native_histogram_value = s
9797
if metric.type == 'gauge':
9898
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
9999
if metric._multiprocess_mode in ('min', 'livemin'):

prometheus_client/openmetrics/exposition.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
def _is_valid_exemplar_metric(metric, sample):
1111
if metric.type == 'counter' and sample.name.endswith('_total'):
1212
return True
13-
if metric.type in ('histogram', 'gaugehistogram') and sample.name.endswith('_bucket'):
13+
if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
14+
return True
15+
if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
1416
return True
1517
return False
1618

prometheus_client/openmetrics/parser.py

+133-23
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import re
77

88
from ..metrics_core import Metric, METRIC_LABEL_NAME_RE
9-
from ..samples import Exemplar, Sample, Timestamp
9+
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
1010
from ..utils import floatToGoString
1111

1212

@@ -364,6 +364,99 @@ def _parse_remaining_text(text):
364364
return val, ts, exemplar
365365

366366

367+
def _parse_nh_sample(text, suffixes):
368+
labels_start = text.find("{")
369+
# check if it's a native histogram with labels
370+
re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$')
371+
re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$')
372+
if re_nh_with_labels.match(text):
373+
nh_value_start = text.rindex("{")
374+
labels_end = nh_value_start - 2
375+
labelstext = text[labels_start + 1:labels_end]
376+
labels = _parse_labels(labelstext)
377+
name_end = labels_start
378+
name = text[:name_end]
379+
if name.endswith(suffixes):
380+
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
381+
nh_value = text[nh_value_start:]
382+
nat_hist_value = _parse_nh_struct(nh_value)
383+
return Sample(name, labels, None, None, None, nat_hist_value)
384+
# check if it's a native histogram
385+
if re_nh_without_labels.match(text):
386+
nh_value_start = labels_start
387+
nh_value = text[nh_value_start:]
388+
name_end = nh_value_start - 1
389+
name = text[:name_end]
390+
if name.endswith(suffixes):
391+
raise ValueError("the sample name of a native histogram should have no suffixes", name)
392+
nat_hist_value = _parse_nh_struct(nh_value)
393+
return Sample(name, None, None, None, None, nat_hist_value)
394+
else:
395+
# it's not a native histogram
396+
return
397+
398+
399+
def _parse_nh_struct(text):
400+
pattern = r'(\w+):\s*([^,}]+)'
401+
402+
re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]')
403+
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')
404+
405+
items = dict(re.findall(pattern, text))
406+
spans = dict(re_spans.findall(text))
407+
deltas = dict(re_deltas.findall(text))
408+
409+
count_value = int(items['count'])
410+
sum_value = int(items['sum'])
411+
schema = int(items['schema'])
412+
zero_threshold = float(items['zero_threshold'])
413+
zero_count = int(items['zero_count'])
414+
415+
try:
416+
pos_spans_text = spans['positive_spans']
417+
elems = pos_spans_text.split(',')
418+
arg1 = [int(x) for x in elems[0].split(':')]
419+
arg2 = [int(x) for x in elems[1].split(':')]
420+
pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
421+
except KeyError:
422+
pos_spans = None
423+
424+
try:
425+
neg_spans_text = spans['negative_spans']
426+
elems = neg_spans_text.split(',')
427+
arg1 = [int(x) for x in elems[0].split(':')]
428+
arg2 = [int(x) for x in elems[1].split(':')]
429+
neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
430+
except KeyError:
431+
neg_spans = None
432+
433+
try:
434+
pos_deltas_text = deltas['positive_deltas']
435+
elems = pos_deltas_text.split(',')
436+
pos_deltas = tuple([int(x) for x in elems])
437+
except KeyError:
438+
pos_deltas = None
439+
440+
try:
441+
neg_deltas_text = deltas['negative_deltas']
442+
elems = neg_deltas_text.split(',')
443+
neg_deltas = tuple([int(x) for x in elems])
444+
except KeyError:
445+
neg_deltas = None
446+
447+
return NativeHistogram(
448+
count_value=count_value,
449+
sum_value=sum_value,
450+
schema=schema,
451+
zero_threshold=zero_threshold,
452+
zero_count=zero_count,
453+
pos_spans=pos_spans,
454+
neg_spans=neg_spans,
455+
pos_deltas=pos_deltas,
456+
neg_deltas=neg_deltas
457+
)
458+
459+
367460
def _group_for_sample(sample, name, typ):
368461
if typ == 'info':
369462
# We can't distinguish between groups for info metrics.
@@ -406,6 +499,8 @@ def do_checks():
406499
for s in samples:
407500
suffix = s.name[len(name):]
408501
g = _group_for_sample(s, name, 'histogram')
502+
if len(suffix) == 0:
503+
continue
409504
if g != group or s.timestamp != timestamp:
410505
if group is not None:
411506
do_checks()
@@ -486,6 +581,8 @@ def build_metric(name, documentation, typ, unit, samples):
486581
metric.samples = samples
487582
return metric
488583

584+
is_nh = False
585+
typ = None
489586
for line in fd:
490587
if line[-1] == '\n':
491588
line = line[:-1]
@@ -518,7 +615,7 @@ def build_metric(name, documentation, typ, unit, samples):
518615
group_timestamp_samples = set()
519616
samples = []
520617
allowed_names = [parts[2]]
521-
618+
522619
if parts[1] == 'HELP':
523620
if documentation is not None:
524621
raise ValueError("More than one HELP for metric: " + line)
@@ -537,8 +634,18 @@ def build_metric(name, documentation, typ, unit, samples):
537634
else:
538635
raise ValueError("Invalid line: " + line)
539636
else:
540-
sample = _parse_sample(line)
541-
if sample.name not in allowed_names:
637+
if typ == 'histogram':
638+
# set to true to account for native histograms naming exceptions/sanitizing differences
639+
is_nh = True
640+
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
641+
# It's not a native histogram
642+
if sample is None:
643+
is_nh = False
644+
sample = _parse_sample(line)
645+
else:
646+
is_nh = False
647+
sample = _parse_sample(line)
648+
if sample.name not in allowed_names and not is_nh:
542649
if name is not None:
543650
yield build_metric(name, documentation, typ, unit, samples)
544651
# Start an unknown metric.
@@ -570,26 +677,29 @@ def build_metric(name, documentation, typ, unit, samples):
570677
or _isUncanonicalNumber(sample.labels['quantile']))):
571678
raise ValueError("Invalid quantile label: " + line)
572679

573-
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
574-
if group is not None and g != group and g in seen_groups:
575-
raise ValueError("Invalid metric grouping: " + line)
576-
if group is not None and g == group:
577-
if (sample.timestamp is None) != (group_timestamp is None):
578-
raise ValueError("Mix of timestamp presence within a group: " + line)
579-
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
580-
raise ValueError("Timestamps went backwards within a group: " + line)
680+
if not is_nh:
681+
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
682+
if group is not None and g != group and g in seen_groups:
683+
raise ValueError("Invalid metric grouping: " + line)
684+
if group is not None and g == group:
685+
if (sample.timestamp is None) != (group_timestamp is None):
686+
raise ValueError("Mix of timestamp presence within a group: " + line)
687+
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
688+
raise ValueError("Timestamps went backwards within a group: " + line)
689+
else:
690+
group_timestamp_samples = set()
691+
692+
series_id = (sample.name, tuple(sorted(sample.labels.items())))
693+
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
694+
# Not a duplicate due to timestamp truncation.
695+
samples.append(sample)
696+
group_timestamp_samples.add(series_id)
697+
698+
group = g
699+
group_timestamp = sample.timestamp
700+
seen_groups.add(g)
581701
else:
582-
group_timestamp_samples = set()
583-
584-
series_id = (sample.name, tuple(sorted(sample.labels.items())))
585-
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
586-
# Not a duplicate due to timestamp truncation.
587702
samples.append(sample)
588-
group_timestamp_samples.add(series_id)
589-
590-
group = g
591-
group_timestamp = sample.timestamp
592-
seen_groups.add(g)
593703

594704
if typ == 'stateset' and sample.value not in [0, 1]:
595705
raise ValueError("Stateset samples can only have values zero and one: " + line)
@@ -606,7 +716,7 @@ def build_metric(name, documentation, typ, unit, samples):
606716
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
607717
or (typ in ['counter'] and sample.name.endswith('_total'))):
608718
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)
609-
719+
610720
if name is not None:
611721
yield build_metric(name, documentation, typ, unit, samples)
612722

prometheus_client/samples.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Dict, NamedTuple, Optional, Union
1+
from typing import Dict, NamedTuple, Optional, Sequence, Tuple, Union
22

33

44
class Timestamp:
@@ -34,6 +34,25 @@ def __lt__(self, other: "Timestamp") -> bool:
3434
return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec
3535

3636

37+
# BucketSpan is experimental and subject to change at any time.
38+
class BucketSpan(NamedTuple):
39+
offset: int
40+
length: int
41+
42+
43+
# NativeHistogram is experimental and subject to change at any time.
44+
class NativeHistogram(NamedTuple):
45+
count_value: float
46+
sum_value: float
47+
schema: int
48+
zero_threshold: float
49+
zero_count: float
50+
pos_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
51+
neg_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
52+
pos_deltas: Optional[Sequence[int]] = None
53+
neg_deltas: Optional[Sequence[int]] = None
54+
55+
3756
# Timestamp and exemplar are optional.
3857
# Value can be an int or a float.
3958
# Timestamp can be a float containing a unixtime in seconds,
@@ -51,3 +70,4 @@ class Sample(NamedTuple):
5170
value: float
5271
timestamp: Optional[Union[float, Timestamp]] = None
5372
exemplar: Optional[Exemplar] = None
73+
native_histogram: Optional[NativeHistogram] = None

0 commit comments

Comments
 (0)