Skip to content

Commit 0b3f66c

Browse files
committed
PeriodicExportingMetricReader will continue collection times out (#3098)
In cases where collection times out, the period exporting reader thread should not terminate, but instead catch, log, and continue on after the regular interval seconds. Prior to this commit, a metric collection timeout would terminate the thread and stop reporting metrics to the wrapped exporter resulting in the appearance in observability tooling of metrics just stopping without reason.
1 parent c757e4a commit 0b3f66c

File tree

3 files changed

+67
-6
lines changed

3 files changed

+67
-6
lines changed

CHANGELOG.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2828
([#3156](https://github.com/open-telemetry/opentelemetry-python/pull/3156))
2929
- deprecate jaeger exporters
3030
([#3158](https://github.com/open-telemetry/opentelemetry-python/pull/3158))
31-
3231
- Create a single resource instance
3332
([#3118](https://github.com/open-telemetry/opentelemetry-python/pull/3118))
33+
- PeriodicExportingMetricReader will continue if collection times out
34+
([#3100](https://github.com/open-telemetry/opentelemetry-python/pull/3100))
35+
3436

3537
## Version 1.15.0/0.36b0 (2022-12-09)
3638

opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/export/__init__.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
AggregationTemporality,
4242
DefaultAggregation,
4343
)
44+
from opentelemetry.sdk.metrics._internal.exceptions import MetricsTimeoutError
4445
from opentelemetry.sdk.metrics._internal.instrument import (
4546
Counter,
4647
Histogram,
@@ -497,7 +498,14 @@ def _at_fork_reinit(self):
497498
def _ticker(self) -> None:
498499
interval_secs = self._export_interval_millis / 1e3
499500
while not self._shutdown_event.wait(interval_secs):
500-
self.collect(timeout_millis=self._export_timeout_millis)
501+
try:
502+
self.collect(timeout_millis=self._export_timeout_millis)
503+
except MetricsTimeoutError:
504+
_logger.warning(
505+
"Metric collection timed out. Will try again after %s seconds",
506+
interval_secs,
507+
exc_info=True,
508+
)
501509
# one last collection below before shutting down completely
502510
self.collect(timeout_millis=self._export_interval_millis)
503511

opentelemetry-sdk/tests/metrics/test_periodic_exporting_metric_reader.py

+55-4
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,18 @@
1414

1515
import math
1616
from time import sleep, time_ns
17-
from typing import Sequence
17+
from typing import (
18+
Optional,
19+
Sequence,
20+
)
1821
from unittest.mock import Mock
1922

2023
from flaky import flaky
2124

22-
from opentelemetry.sdk.metrics import Counter
25+
from opentelemetry.sdk.metrics import (
26+
Counter,
27+
MetricsTimeoutError,
28+
)
2329
from opentelemetry.sdk.metrics._internal import _Counter
2430
from opentelemetry.sdk.metrics.export import (
2531
AggregationTemporality,
@@ -67,6 +73,25 @@ def force_flush(self, timeout_millis: float = 10_000) -> bool:
6773
return True
6874

6975

76+
class ExceptionAtCollectionPeriodicExportingMetricReader(
77+
PeriodicExportingMetricReader
78+
):
79+
def __init__(
80+
self,
81+
exporter: MetricExporter,
82+
exception: Exception,
83+
export_interval_millis: Optional[float] = None,
84+
export_timeout_millis: Optional[float] = None,
85+
) -> None:
86+
super().__init__(
87+
exporter, export_interval_millis, export_timeout_millis
88+
)
89+
self._collect_exception = exception
90+
91+
def collect(self, timeout_millis: float = 10_000) -> None:
92+
raise self._collect_exception
93+
94+
7095
metrics_list = [
7196
Metric(
7297
name="sum_name",
@@ -111,11 +136,13 @@ def test_defaults(self):
111136
pmr.shutdown()
112137

113138
def _create_periodic_reader(
114-
self, metrics, exporter, collect_wait=0, interval=60000
139+
self, metrics, exporter, collect_wait=0, interval=60000, timeout=30000
115140
):
116141

117142
pmr = PeriodicExportingMetricReader(
118-
exporter, export_interval_millis=interval
143+
exporter,
144+
export_interval_millis=interval,
145+
export_timeout_millis=timeout,
119146
)
120147

121148
def _collect(reader, timeout_millis):
@@ -219,3 +246,27 @@ def test_exporter_aggregation_preference(self):
219246
self.assertTrue(isinstance(value, DefaultAggregation))
220247
else:
221248
self.assertTrue(isinstance(value, LastValueAggregation))
249+
250+
def test_metric_timeout_does_not_kill_worker_thread(self):
251+
exporter = FakeMetricsExporter()
252+
pmr = ExceptionAtCollectionPeriodicExportingMetricReader(
253+
exporter,
254+
MetricsTimeoutError("test timeout"),
255+
export_timeout_millis=1,
256+
)
257+
258+
sleep(0.1)
259+
self.assertTrue(pmr._daemon_thread.is_alive())
260+
pmr.shutdown()
261+
262+
def test_non_metric_timeout_error_kills_exporter_thread(self):
263+
exporter = FakeMetricsExporter()
264+
pmr = ExceptionAtCollectionPeriodicExportingMetricReader(
265+
exporter,
266+
ZeroDivisionError(),
267+
export_timeout_millis=1,
268+
)
269+
270+
sleep(0.1)
271+
self.assertFalse(pmr._daemon_thread.is_alive())
272+
pmr.shutdown()

0 commit comments

Comments
 (0)