Add support for native histograms in OM parser (#1040)

vesari · web-flow · commit d7c9cd88c7f5 · 2024-09-20T11:04:20.000-06:00
* Start on native histogram parser
* Fix regex for nh sample
* Get nh sample appended
* Complete parsing for simple native histogram
* Add parsing for native histograms with labels, fix linting
* Mitigate type and style errors
* Add test for parsing coexisting native and classic hist with simple label set
* Solve error in Python 3.9 tests
* Add test for native + classic histograms with more than a label set and adapt logic accordigly
* Separate native histogram from value field, improve conditional/try blocks
* Clean up debug lines, add warnings, delete unnecessary lines

Signed-off-by: Arianna Vespri &lt;arianna.vespri@yahoo.it&gt;
diff --git a/prometheus_client/core.py b/prometheus_client/core.py
@@ -5,9 +5,10 @@
     SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
 )
 from .registry import CollectorRegistry, REGISTRY
-from .samples import Exemplar, Sample, Timestamp
+from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
 
 __all__ = (
+    'BucketSpan',
     'CollectorRegistry',
     'Counter',
     'CounterMetricFamily',
@@ -21,6 +22,7 @@
     'Info',
     'InfoMetricFamily',
     'Metric',
+    'NativeHistogram',
     'REGISTRY',
     'Sample',
     'StateSetMetricFamily',
diff --git a/prometheus_client/metrics.py b/prometheus_client/metrics.py
@@ -111,8 +111,8 @@ def describe(self) -> Iterable[Metric]:
 
     def collect(self) -> Iterable[Metric]:
         metric = self._get_metric()
-        for suffix, labels, value, timestamp, exemplar in self._samples():
-            metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar)
+        for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
+            metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
         return [metric]
 
     def __str__(self) -> str:
@@ -246,8 +246,8 @@ def _multi_samples(self) -> Iterable[Sample]:
             metrics = self._metrics.copy()
         for labels, metric in metrics.items():
             series_labels = list(zip(self._labelnames, labels))
-            for suffix, sample_labels, value, timestamp, exemplar in metric._samples():
-                yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar)
+            for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
+                yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)
 
     def _child_samples(self) -> Iterable[Sample]:  # pragma: no cover
         raise NotImplementedError('_child_samples() must be implemented by %r' % self)
diff --git a/prometheus_client/metrics_core.py b/prometheus_client/metrics_core.py
@@ -1,7 +1,7 @@
 import re
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
-from .samples import Exemplar, Sample, Timestamp
+from .samples import Exemplar, NativeHistogram, Sample, Timestamp
 
 METRIC_TYPES = (
     'counter', 'gauge', 'summary', 'histogram',
@@ -36,11 +36,11 @@ def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
         self.type: str = typ
         self.samples: List[Sample] = []
 
-    def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None:
+    def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
         """Add a sample to the metric.
 
         Internal-only, do not use."""
-        self.samples.append(Sample(name, labels, value, timestamp, exemplar))
+        self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))
 
     def __eq__(self, other: object) -> bool:
         return (isinstance(other, Metric)
@@ -284,7 +284,6 @@ def add_metric(self,
                 Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))
 
 
-
 class GaugeHistogramMetricFamily(Metric):
     """A single gauge histogram and its samples.
 
diff --git a/prometheus_client/multiprocess.py b/prometheus_client/multiprocess.py
@@ -93,7 +93,7 @@ def _accumulate_metrics(metrics, accumulate):
             buckets = defaultdict(lambda: defaultdict(float))
             samples_setdefault = samples.setdefault
             for s in metric.samples:
-                name, labels, value, timestamp, exemplar = s
+                name, labels, value, timestamp, exemplar, native_histogram_value = s
                 if metric.type == 'gauge':
                     without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
                     if metric._multiprocess_mode in ('min', 'livemin'):
diff --git a/prometheus_client/openmetrics/exposition.py b/prometheus_client/openmetrics/exposition.py
@@ -10,7 +10,9 @@
 def _is_valid_exemplar_metric(metric, sample):
     if metric.type == 'counter' and sample.name.endswith('_total'):
         return True
-    if metric.type in ('histogram', 'gaugehistogram') and sample.name.endswith('_bucket'):
+    if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
+        return True
+    if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
         return True
     return False
 
diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py
@@ -6,7 +6,7 @@
 import re
 
 from ..metrics_core import Metric, METRIC_LABEL_NAME_RE
-from ..samples import Exemplar, Sample, Timestamp
+from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
 from ..utils import floatToGoString
 
 
@@ -364,6 +364,99 @@ def _parse_remaining_text(text):
     return val, ts, exemplar
 
 
+def _parse_nh_sample(text, suffixes):
+    labels_start = text.find("{")
+    # check if it's a native histogram with labels
+    re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$')
+    re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$')
+    if re_nh_with_labels.match(text):
+        nh_value_start = text.rindex("{")
+        labels_end = nh_value_start - 2
+        labelstext = text[labels_start + 1:labels_end]
+        labels = _parse_labels(labelstext)
+        name_end = labels_start
+        name = text[:name_end]
+        if name.endswith(suffixes):
+            raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) 
+        nh_value = text[nh_value_start:]
+        nat_hist_value = _parse_nh_struct(nh_value)
+        return Sample(name, labels, None, None, None, nat_hist_value)
+    # check if it's a native histogram
+    if re_nh_without_labels.match(text):
+        nh_value_start = labels_start
+        nh_value = text[nh_value_start:]
+        name_end = nh_value_start - 1
+        name = text[:name_end]
+        if name.endswith(suffixes):
+            raise ValueError("the sample name of a native histogram should have no suffixes", name)
+        nat_hist_value = _parse_nh_struct(nh_value)
+        return Sample(name, None, None, None, None, nat_hist_value)      
+    else:
+        # it's not a native histogram
+        return
+
+
+def _parse_nh_struct(text):
+    pattern = r'(\w+):\s*([^,}]+)'
+    
+    re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]')
+    re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')
+
+    items = dict(re.findall(pattern, text))
+    spans = dict(re_spans.findall(text))
+    deltas = dict(re_deltas.findall(text))
+
+    count_value = int(items['count'])
+    sum_value = int(items['sum'])
+    schema = int(items['schema'])
+    zero_threshold = float(items['zero_threshold'])
+    zero_count = int(items['zero_count'])
+
+    try:
+        pos_spans_text = spans['positive_spans']
+        elems = pos_spans_text.split(',')
+        arg1 = [int(x) for x in elems[0].split(':')]
+        arg2 = [int(x) for x in elems[1].split(':')]
+        pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
+    except KeyError:
+        pos_spans = None
+       
+    try:
+        neg_spans_text = spans['negative_spans']
+        elems = neg_spans_text.split(',')
+        arg1 = [int(x) for x in elems[0].split(':')]
+        arg2 = [int(x) for x in elems[1].split(':')]
+        neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
+    except KeyError:
+        neg_spans = None
+       
+    try:
+        pos_deltas_text = deltas['positive_deltas']
+        elems = pos_deltas_text.split(',')
+        pos_deltas = tuple([int(x) for x in elems])
+    except KeyError:
+        pos_deltas = None
+       
+    try:
+        neg_deltas_text = deltas['negative_deltas']
+        elems = neg_deltas_text.split(',')
+        neg_deltas = tuple([int(x) for x in elems])
+    except KeyError:
+        neg_deltas = None
+       
+    return NativeHistogram(
+        count_value=count_value,
+        sum_value=sum_value,
+        schema=schema,
+        zero_threshold=zero_threshold,
+        zero_count=zero_count,
+        pos_spans=pos_spans,
+        neg_spans=neg_spans,
+        pos_deltas=pos_deltas,
+        neg_deltas=neg_deltas
+    )
+        
+
 def _group_for_sample(sample, name, typ):
     if typ == 'info':
         # We can't distinguish between groups for info metrics.
@@ -406,6 +499,8 @@ def do_checks():
     for s in samples:
         suffix = s.name[len(name):]
         g = _group_for_sample(s, name, 'histogram')
+        if len(suffix) == 0:
+            continue
         if g != group or s.timestamp != timestamp:
             if group is not None:
                 do_checks()
@@ -486,6 +581,8 @@ def build_metric(name, documentation, typ, unit, samples):
         metric.samples = samples
         return metric
 
+    is_nh = False
+    typ = None
     for line in fd:
         if line[-1] == '\n':
             line = line[:-1]
@@ -518,7 +615,7 @@ def build_metric(name, documentation, typ, unit, samples):
                 group_timestamp_samples = set()
                 samples = []
                 allowed_names = [parts[2]]
-
+            
             if parts[1] == 'HELP':
                 if documentation is not None:
                     raise ValueError("More than one HELP for metric: " + line)
@@ -537,8 +634,18 @@ def build_metric(name, documentation, typ, unit, samples):
             else:
                 raise ValueError("Invalid line: " + line)
         else:
-            sample = _parse_sample(line)
-            if sample.name not in allowed_names:
+            if typ == 'histogram':
+                # set to true to account for native histograms naming exceptions/sanitizing differences
+                is_nh = True
+                sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
+                # It's not a native histogram
+                if sample is None:
+                    is_nh = False
+                    sample = _parse_sample(line)              
+            else:
+                is_nh = False
+                sample = _parse_sample(line)
+            if sample.name not in allowed_names and not is_nh:
                 if name is not None:
                     yield build_metric(name, documentation, typ, unit, samples)
                 # Start an unknown metric.
@@ -570,26 +677,29 @@ def build_metric(name, documentation, typ, unit, samples):
                          or _isUncanonicalNumber(sample.labels['quantile']))):
                 raise ValueError("Invalid quantile label: " + line)
 
-            g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
-            if group is not None and g != group and g in seen_groups:
-                raise ValueError("Invalid metric grouping: " + line)
-            if group is not None and g == group:
-                if (sample.timestamp is None) != (group_timestamp is None):
-                    raise ValueError("Mix of timestamp presence within a group: " + line)
-                if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
-                    raise ValueError("Timestamps went backwards within a group: " + line)
+            if not is_nh:
+                g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
+                if group is not None and g != group and g in seen_groups:
+                    raise ValueError("Invalid metric grouping: " + line)
+                if group is not None and g == group:
+                    if (sample.timestamp is None) != (group_timestamp is None):
+                        raise ValueError("Mix of timestamp presence within a group: " + line)
+                    if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
+                        raise ValueError("Timestamps went backwards within a group: " + line)
+                else:
+                    group_timestamp_samples = set()
+
+                series_id = (sample.name, tuple(sorted(sample.labels.items())))
+                if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
+                    # Not a duplicate due to timestamp truncation.
+                    samples.append(sample)
+                group_timestamp_samples.add(series_id)
+
+                group = g
+                group_timestamp = sample.timestamp
+                seen_groups.add(g)
             else:
-                group_timestamp_samples = set()
-
-            series_id = (sample.name, tuple(sorted(sample.labels.items())))
-            if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
-                # Not a duplicate due to timestamp truncation.
                 samples.append(sample)
-            group_timestamp_samples.add(series_id)
-
-            group = g
-            group_timestamp = sample.timestamp
-            seen_groups.add(g)
 
             if typ == 'stateset' and sample.value not in [0, 1]:
                 raise ValueError("Stateset samples can only have values zero and one: " + line)
@@ -606,7 +716,7 @@ def build_metric(name, documentation, typ, unit, samples):
                     (typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
                     or (typ in ['counter'] and sample.name.endswith('_total'))):
                 raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)
-
+    
     if name is not None:
         yield build_metric(name, documentation, typ, unit, samples)
 
diff --git a/prometheus_client/samples.py b/prometheus_client/samples.py
@@ -1,4 +1,4 @@
-from typing import Dict, NamedTuple, Optional, Union
+from typing import Dict, NamedTuple, Optional, Sequence, Tuple, Union
 
 
 class Timestamp:
@@ -34,6 +34,25 @@ def __lt__(self, other: "Timestamp") -> bool:
         return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec
 
 
+# BucketSpan is experimental and subject to change at any time.
+class BucketSpan(NamedTuple):
+    offset: int
+    length: int
+
+
+# NativeHistogram is experimental and subject to change at any time.
+class NativeHistogram(NamedTuple):
+    count_value: float
+    sum_value: float
+    schema: int
+    zero_threshold: float
+    zero_count: float
+    pos_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
+    neg_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
+    pos_deltas: Optional[Sequence[int]] = None
+    neg_deltas: Optional[Sequence[int]] = None
+
+
 # Timestamp and exemplar are optional.
 # Value can be an int or a float.
 # Timestamp can be a float containing a unixtime in seconds,
@@ -51,3 +70,4 @@ class Sample(NamedTuple):
     value: float
     timestamp: Optional[Union[float, Timestamp]] = None
     exemplar: Optional[Exemplar] = None
+    native_histogram: Optional[NativeHistogram] = None
diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py