Skip to content

Commit c03b739

Browse files
committed
[exporter-otlp-proto-common] Include metric info when an exception occurs during encoding
Also remove redundant str(exception) from the exception message
1 parent ea36c5d commit c03b739

File tree

4 files changed

+188
-145
lines changed

4 files changed

+188
-145
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3434
- Remove `_start_time_unix_nano` attribute from `_ViewInstrumentMatch` in favor
3535
of using `time_ns()` at the moment when the aggregation object is created
3636
([#4137](https://github.com/open-telemetry/opentelemetry-python/pull/4137))
37+
- Include metric info in encoding exceptions
38+
([#4137](https://github.com/open-telemetry/opentelemetry-python/pull/4154))
3739

3840
## Version 1.26.0/0.47b0 (2024-07-25)
3941

exporter/opentelemetry-exporter-otlp-proto-common/src/opentelemetry/exporter/otlp/proto/common/_internal/metrics_encoder/__init__.py

+149-143
Original file line numberDiff line numberDiff line change
@@ -173,152 +173,26 @@ def _get_aggregation(
173173
return instrument_class_aggregation
174174

175175

176-
def encode_metrics(data: MetricsData) -> ExportMetricsServiceRequest:
177-
resource_metrics_dict = {}
178-
179-
for resource_metrics in data.resource_metrics:
180-
181-
resource = resource_metrics.resource
176+
class EncodingException(Exception):
177+
"""
178+
Raised by encode_metrics() when an exception is caught during encoding. Contains the problematic metric so
179+
the misbehaving metric name and details can be logged during exception handling.
180+
"""
182181

183-
# It is safe to assume that each entry in data.resource_metrics is
184-
# associated with an unique resource.
185-
scope_metrics_dict = {}
182+
def __init__(self, original_exception, metric):
183+
super().__init__()
184+
self.original_exception = original_exception
185+
self.metric = metric
186186

187-
resource_metrics_dict[resource] = scope_metrics_dict
188-
189-
for scope_metrics in resource_metrics.scope_metrics:
190-
191-
instrumentation_scope = scope_metrics.scope
192-
193-
# The SDK groups metrics in instrumentation scopes already so
194-
# there is no need to check for existing instrumentation scopes
195-
# here.
196-
pb2_scope_metrics = pb2.ScopeMetrics(
197-
scope=InstrumentationScope(
198-
name=instrumentation_scope.name,
199-
version=instrumentation_scope.version,
200-
)
201-
)
187+
def __str__(self):
188+
return f"{self.metric}\n{self.original_exception}"
202189

203-
scope_metrics_dict[instrumentation_scope] = pb2_scope_metrics
204190

205-
for metric in scope_metrics.metrics:
206-
pb2_metric = pb2.Metric(
207-
name=metric.name,
208-
description=metric.description,
209-
unit=metric.unit,
210-
)
191+
def encode_metrics(data: MetricsData) -> ExportMetricsServiceRequest:
192+
resource_metrics_dict = {}
211193

212-
if isinstance(metric.data, Gauge):
213-
for data_point in metric.data.data_points:
214-
pt = pb2.NumberDataPoint(
215-
attributes=_encode_attributes(
216-
data_point.attributes
217-
),
218-
time_unix_nano=data_point.time_unix_nano,
219-
)
220-
if isinstance(data_point.value, int):
221-
pt.as_int = data_point.value
222-
else:
223-
pt.as_double = data_point.value
224-
pb2_metric.gauge.data_points.append(pt)
225-
226-
elif isinstance(metric.data, HistogramType):
227-
for data_point in metric.data.data_points:
228-
pt = pb2.HistogramDataPoint(
229-
attributes=_encode_attributes(
230-
data_point.attributes
231-
),
232-
time_unix_nano=data_point.time_unix_nano,
233-
start_time_unix_nano=(
234-
data_point.start_time_unix_nano
235-
),
236-
count=data_point.count,
237-
sum=data_point.sum,
238-
bucket_counts=data_point.bucket_counts,
239-
explicit_bounds=data_point.explicit_bounds,
240-
max=data_point.max,
241-
min=data_point.min,
242-
)
243-
pb2_metric.histogram.aggregation_temporality = (
244-
metric.data.aggregation_temporality
245-
)
246-
pb2_metric.histogram.data_points.append(pt)
247-
248-
elif isinstance(metric.data, Sum):
249-
for data_point in metric.data.data_points:
250-
pt = pb2.NumberDataPoint(
251-
attributes=_encode_attributes(
252-
data_point.attributes
253-
),
254-
start_time_unix_nano=(
255-
data_point.start_time_unix_nano
256-
),
257-
time_unix_nano=data_point.time_unix_nano,
258-
)
259-
if isinstance(data_point.value, int):
260-
pt.as_int = data_point.value
261-
else:
262-
pt.as_double = data_point.value
263-
# note that because sum is a message type, the
264-
# fields must be set individually rather than
265-
# instantiating a pb2.Sum and setting it once
266-
pb2_metric.sum.aggregation_temporality = (
267-
metric.data.aggregation_temporality
268-
)
269-
pb2_metric.sum.is_monotonic = metric.data.is_monotonic
270-
pb2_metric.sum.data_points.append(pt)
271-
272-
elif isinstance(metric.data, ExponentialHistogramType):
273-
for data_point in metric.data.data_points:
274-
275-
if data_point.positive.bucket_counts:
276-
positive = pb2.ExponentialHistogramDataPoint.Buckets(
277-
offset=data_point.positive.offset,
278-
bucket_counts=data_point.positive.bucket_counts,
279-
)
280-
else:
281-
positive = None
282-
283-
if data_point.negative.bucket_counts:
284-
negative = pb2.ExponentialHistogramDataPoint.Buckets(
285-
offset=data_point.negative.offset,
286-
bucket_counts=data_point.negative.bucket_counts,
287-
)
288-
else:
289-
negative = None
290-
291-
pt = pb2.ExponentialHistogramDataPoint(
292-
attributes=_encode_attributes(
293-
data_point.attributes
294-
),
295-
time_unix_nano=data_point.time_unix_nano,
296-
start_time_unix_nano=(
297-
data_point.start_time_unix_nano
298-
),
299-
count=data_point.count,
300-
sum=data_point.sum,
301-
scale=data_point.scale,
302-
zero_count=data_point.zero_count,
303-
positive=positive,
304-
negative=negative,
305-
flags=data_point.flags,
306-
max=data_point.max,
307-
min=data_point.min,
308-
)
309-
pb2_metric.exponential_histogram.aggregation_temporality = (
310-
metric.data.aggregation_temporality
311-
)
312-
pb2_metric.exponential_histogram.data_points.append(pt)
313-
314-
else:
315-
_logger.warning(
316-
"unsupported data type %s",
317-
metric.data.__class__.__name__,
318-
)
319-
continue
320-
321-
pb2_scope_metrics.metrics.append(pb2_metric)
194+
for resource_metrics in data.resource_metrics:
195+
_encode_resource_metrics(resource_metrics, resource_metrics_dict)
322196

323197
resource_data = []
324198
for (
@@ -334,5 +208,137 @@ def encode_metrics(data: MetricsData) -> ExportMetricsServiceRequest:
334208
schema_url=sdk_resource.schema_url,
335209
)
336210
)
337-
resource_metrics = resource_data
338-
return ExportMetricsServiceRequest(resource_metrics=resource_metrics)
211+
return ExportMetricsServiceRequest(resource_metrics=resource_data)
212+
213+
214+
def _encode_resource_metrics(resource_metrics, resource_metrics_dict):
215+
resource = resource_metrics.resource
216+
# It is safe to assume that each entry in data.resource_metrics is
217+
# associated with an unique resource.
218+
scope_metrics_dict = {}
219+
resource_metrics_dict[resource] = scope_metrics_dict
220+
for scope_metrics in resource_metrics.scope_metrics:
221+
instrumentation_scope = scope_metrics.scope
222+
223+
# The SDK groups metrics in instrumentation scopes already so
224+
# there is no need to check for existing instrumentation scopes
225+
# here.
226+
pb2_scope_metrics = pb2.ScopeMetrics(
227+
scope=InstrumentationScope(
228+
name=instrumentation_scope.name,
229+
version=instrumentation_scope.version,
230+
)
231+
)
232+
233+
scope_metrics_dict[instrumentation_scope] = pb2_scope_metrics
234+
235+
for metric in scope_metrics.metrics:
236+
pb2_metric = pb2.Metric(
237+
name=metric.name,
238+
description=metric.description,
239+
unit=metric.unit,
240+
)
241+
242+
try:
243+
_encode_metric(metric, pb2_metric)
244+
except Exception as ex:
245+
# `from None` so we don't get "During handling of the above exception, another exception occurred:"
246+
raise EncodingException(ex, metric) from None
247+
248+
pb2_scope_metrics.metrics.append(pb2_metric)
249+
250+
251+
def _encode_metric(metric, pb2_metric):
252+
if isinstance(metric.data, Gauge):
253+
for data_point in metric.data.data_points:
254+
pt = pb2.NumberDataPoint(
255+
attributes=_encode_attributes(data_point.attributes),
256+
time_unix_nano=data_point.time_unix_nano,
257+
)
258+
if isinstance(data_point.value, int):
259+
pt.as_int = data_point.value
260+
else:
261+
pt.as_double = data_point.value
262+
pb2_metric.gauge.data_points.append(pt)
263+
264+
elif isinstance(metric.data, HistogramType):
265+
for data_point in metric.data.data_points:
266+
pt = pb2.HistogramDataPoint(
267+
attributes=_encode_attributes(data_point.attributes),
268+
time_unix_nano=data_point.time_unix_nano,
269+
start_time_unix_nano=data_point.start_time_unix_nano,
270+
count=data_point.count,
271+
sum=data_point.sum,
272+
bucket_counts=data_point.bucket_counts,
273+
explicit_bounds=data_point.explicit_bounds,
274+
max=data_point.max,
275+
min=data_point.min,
276+
)
277+
pb2_metric.histogram.aggregation_temporality = (
278+
metric.data.aggregation_temporality
279+
)
280+
pb2_metric.histogram.data_points.append(pt)
281+
282+
elif isinstance(metric.data, Sum):
283+
for data_point in metric.data.data_points:
284+
pt = pb2.NumberDataPoint(
285+
attributes=_encode_attributes(data_point.attributes),
286+
start_time_unix_nano=data_point.start_time_unix_nano,
287+
time_unix_nano=data_point.time_unix_nano,
288+
)
289+
if isinstance(data_point.value, int):
290+
pt.as_int = data_point.value
291+
else:
292+
pt.as_double = data_point.value
293+
# note that because sum is a message type, the
294+
# fields must be set individually rather than
295+
# instantiating a pb2.Sum and setting it once
296+
pb2_metric.sum.aggregation_temporality = (
297+
metric.data.aggregation_temporality
298+
)
299+
pb2_metric.sum.is_monotonic = metric.data.is_monotonic
300+
pb2_metric.sum.data_points.append(pt)
301+
302+
elif isinstance(metric.data, ExponentialHistogramType):
303+
for data_point in metric.data.data_points:
304+
305+
if data_point.positive.bucket_counts:
306+
positive = pb2.ExponentialHistogramDataPoint.Buckets(
307+
offset=data_point.positive.offset,
308+
bucket_counts=data_point.positive.bucket_counts,
309+
)
310+
else:
311+
positive = None
312+
313+
if data_point.negative.bucket_counts:
314+
negative = pb2.ExponentialHistogramDataPoint.Buckets(
315+
offset=data_point.negative.offset,
316+
bucket_counts=data_point.negative.bucket_counts,
317+
)
318+
else:
319+
negative = None
320+
321+
pt = pb2.ExponentialHistogramDataPoint(
322+
attributes=_encode_attributes(data_point.attributes),
323+
time_unix_nano=data_point.time_unix_nano,
324+
start_time_unix_nano=data_point.start_time_unix_nano,
325+
count=data_point.count,
326+
sum=data_point.sum,
327+
scale=data_point.scale,
328+
zero_count=data_point.zero_count,
329+
positive=positive,
330+
negative=negative,
331+
flags=data_point.flags,
332+
max=data_point.max,
333+
min=data_point.min,
334+
)
335+
pb2_metric.exponential_histogram.aggregation_temporality = (
336+
metric.data.aggregation_temporality
337+
)
338+
pb2_metric.exponential_histogram.data_points.append(pt)
339+
340+
else:
341+
_logger.warning(
342+
"unsupported data type %s",
343+
metric.data.__class__.__name__,
344+
)

exporter/opentelemetry-exporter-otlp-proto-common/tests/test_metrics_encoder.py

+35
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
# pylint: disable=protected-access
1616
import unittest
1717

18+
from opentelemetry.exporter.otlp.proto.common._internal.metrics_encoder import (
19+
EncodingException,
20+
)
1821
from opentelemetry.exporter.otlp.proto.common.metrics_encoder import (
1922
encode_metrics,
2023
)
@@ -814,3 +817,35 @@ def test_encode_exponential_histogram(self):
814817
# pylint: disable=protected-access
815818
actual = encode_metrics(metrics_data)
816819
self.assertEqual(expected, actual)
820+
821+
def test_encoding_exception_reraise(self):
822+
# this number is too big to fit in a signed 64-bit proto field and causes a ValueError
823+
big_number = 2**63
824+
metrics_data = MetricsData(
825+
resource_metrics=[
826+
ResourceMetrics(
827+
resource=Resource(
828+
attributes={},
829+
schema_url="resource_schema_url",
830+
),
831+
scope_metrics=[
832+
ScopeMetrics(
833+
scope=SDKInstrumentationScope(
834+
name="first_name",
835+
version="first_version",
836+
schema_url="insrumentation_scope_schema_url",
837+
),
838+
metrics=[_generate_sum("sum_double", big_number)],
839+
schema_url="instrumentation_scope_schema_url",
840+
)
841+
],
842+
schema_url="resource_schema_url",
843+
)
844+
]
845+
)
846+
with self.assertRaises(EncodingException) as context:
847+
encode_metrics(metrics_data)
848+
849+
# assert that the EncodingException wraps the metric and original exception
850+
assert isinstance(context.exception.metric, Metric)
851+
assert isinstance(context.exception.original_exception, ValueError)

opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,8 @@ def _receive_metrics(
541541
self._exporter.export(
542542
metrics_data, timeout_millis=timeout_millis
543543
)
544-
except Exception as e:
545-
_logger.exception("Exception while exporting metrics %s", str(e))
544+
except Exception:
545+
_logger.exception("Exception while exporting metrics")
546546
detach(token)
547547

548548
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:

0 commit comments

Comments
 (0)