Skip to content

Commit a72c7de

Browse files
aabmasssrikanthccv
andauthored
Add timeouts to metric SDK (#2653)
* Add timeouts to metric SDK * comments * don't use TimeoutError as it is intended for OS related timeouts * changelog and typo * isort * fix _time_ns import * Update CHANGELOG.md Co-authored-by: Srikanth Chekuri <[email protected]> * use self.fail in tests Co-authored-by: Srikanth Chekuri <[email protected]>
1 parent 7397605 commit a72c7de

File tree

10 files changed

+196
-42
lines changed

10 files changed

+196
-42
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased](https://github.com/open-telemetry/opentelemetry-python/compare/v1.11.1-0.30b1...HEAD)
99

10+
- Add timeouts to metric SDK
11+
([#2653](https://github.com/open-telemetry/opentelemetry-python/pull/2653))
1012
- Add variadic arguments to metric exporter/reader interfaces
1113
([#2654](https://github.com/open-telemetry/opentelemetry-python/pull/2654))
1214
- Move Metrics API behind internal package

exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/_metric_exporter/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,13 @@ def _translate_data(
169169
)
170170

171171
def export(
172-
self, metrics: Sequence[Metric], *args, **kwargs
172+
self,
173+
metrics: Sequence[Metric],
174+
timeout_millis: float = 10_000,
175+
**kwargs,
173176
) -> MetricExportResult:
177+
# TODO(#2663): OTLPExporterMixin should pass timeout to gRPC
174178
return self._export(metrics)
175179

176-
def shutdown(self, *args, **kwargs):
180+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
177181
pass

exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,17 @@ def __init__(self, prefix: str = "") -> None:
111111
self._collector._callback = self.collect
112112

113113
def _receive_metrics(
114-
self, metrics: Iterable[Metric], *args, **kwargs
114+
self,
115+
metrics: Iterable[Metric],
116+
timeout_millis: float = 10_000,
117+
**kwargs,
115118
) -> None:
116119
if metrics is None:
117120
return
118121
self._collector.add_metrics_data(metrics)
119122

120-
def shutdown(self, *args, **kwargs) -> bool:
123+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
121124
REGISTRY.unregister(self._collector)
122-
return True
123125

124126

125127
class _CustomCollector:

opentelemetry-sdk/src/opentelemetry/sdk/_metrics/_internal/__init__.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from opentelemetry.sdk.resources import Resource
4949
from opentelemetry.sdk.util.instrumentation import InstrumentationScope
5050
from opentelemetry.util._once import Once
51+
from opentelemetry.util._time import _time_ns
5152

5253
_logger = getLogger(__name__)
5354

@@ -369,16 +370,20 @@ def __init__(
369370
self._shutdown_once = Once()
370371
self._shutdown = False
371372

372-
def force_flush(self) -> bool:
373-
374-
# FIXME implement a timeout
373+
def force_flush(self, timeout_millis: float = 10_000) -> bool:
374+
deadline_ns = _time_ns() + timeout_millis * 10**6
375375

376376
for metric_reader in self._sdk_config.metric_readers:
377-
metric_reader.collect()
377+
current_ts = _time_ns()
378+
if current_ts >= deadline_ns:
379+
raise Exception("Timed out while flushing metric readers")
380+
metric_reader.collect(
381+
timeout_millis=(deadline_ns - current_ts) / 10**6
382+
)
378383
return True
379384

380-
def shutdown(self):
381-
# FIXME implement a timeout
385+
def shutdown(self, timeout_millis: float = 30_000):
386+
deadline_ns = _time_ns() + timeout_millis * 10**6
382387

383388
def _shutdown():
384389
self._shutdown = True
@@ -392,8 +397,15 @@ def _shutdown():
392397
metric_reader_error = {}
393398

394399
for metric_reader in self._sdk_config.metric_readers:
400+
current_ts = _time_ns()
395401
try:
396-
metric_reader.shutdown()
402+
if current_ts >= deadline_ns:
403+
raise Exception(
404+
"Didn't get to execute, deadline already exceeded"
405+
)
406+
metric_reader.shutdown(
407+
timeout_millis=(deadline_ns - current_ts) / 10**6
408+
)
397409

398410
# pylint: disable=broad-except
399411
except Exception as error:

opentelemetry-sdk/src/opentelemetry/sdk/_metrics/_internal/export/__init__.py

+31-14
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from opentelemetry.sdk._metrics.metric_reader import MetricReader
3232
from opentelemetry.sdk._metrics.point import AggregationTemporality, Metric
3333
from opentelemetry.util._once import Once
34+
from opentelemetry.util._time import _time_ns
3435

3536
_logger = logging.getLogger(__name__)
3637

@@ -53,8 +54,11 @@ class MetricExporter(ABC):
5354

5455
@abstractmethod
5556
def export(
56-
self, metrics: Sequence[Metric], *args, **kwargs
57-
) -> "MetricExportResult":
57+
self,
58+
metrics: Sequence[Metric],
59+
timeout_millis: float = 10_000,
60+
**kwargs,
61+
) -> MetricExportResult:
5862
"""Exports a batch of telemetry data.
5963
6064
Args:
@@ -65,7 +69,7 @@ def export(
6569
"""
6670

6771
@abstractmethod
68-
def shutdown(self, *args, **kwargs) -> None:
72+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
6973
"""Shuts down the exporter.
7074
7175
Called when the SDK is shut down.
@@ -90,14 +94,17 @@ def __init__(
9094
self.formatter = formatter
9195

9296
def export(
93-
self, metrics: Sequence[Metric], *args, **kwargs
97+
self,
98+
metrics: Sequence[Metric],
99+
timeout_millis: float = 10_000,
100+
**kwargs,
94101
) -> MetricExportResult:
95102
for metric in metrics:
96103
self.out.write(self.formatter(metric))
97104
self.out.flush()
98105
return MetricExportResult.SUCCESS
99106

100-
def shutdown(self, *args, **kwargs) -> None:
107+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
101108
pass
102109

103110

@@ -127,11 +134,16 @@ def get_metrics(self) -> List[Metric]:
127134
self._metrics = []
128135
return metrics
129136

130-
def _receive_metrics(self, metrics: Iterable[Metric], *args, **kwargs):
137+
def _receive_metrics(
138+
self,
139+
metrics: Iterable[Metric],
140+
timeout_millis: float = 10_000,
141+
**kwargs,
142+
) -> None:
131143
with self._lock:
132144
self._metrics = list(metrics)
133145

134-
def shutdown(self, *args, **kwargs):
146+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
135147
pass
136148

137149

@@ -193,23 +205,28 @@ def _at_fork_reinit(self):
193205
def _ticker(self) -> None:
194206
interval_secs = self._export_interval_millis / 1e3
195207
while not self._shutdown_event.wait(interval_secs):
196-
self.collect()
208+
self.collect(timeout_millis=self._export_timeout_millis)
197209
# one last collection below before shutting down completely
198-
self.collect()
210+
self.collect(timeout_millis=self._export_interval_millis)
199211

200212
def _receive_metrics(
201-
self, metrics: Iterable[Metric], *args, **kwargs
213+
self,
214+
metrics: Iterable[Metric],
215+
timeout_millis: float = 10_000,
216+
**kwargs,
202217
) -> None:
203218
if metrics is None:
204219
return
205220
token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True))
206221
try:
207-
self._exporter.export(metrics)
222+
self._exporter.export(metrics, timeout_millis=timeout_millis)
208223
except Exception as e: # pylint: disable=broad-except,invalid-name
209224
_logger.exception("Exception while exporting metrics %s", str(e))
210225
detach(token)
211226

212-
def shutdown(self, *args, **kwargs):
227+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
228+
deadline_ns = _time_ns() + timeout_millis * 10**6
229+
213230
def _shutdown():
214231
self._shutdown = True
215232

@@ -219,5 +236,5 @@ def _shutdown():
219236
return
220237

221238
self._shutdown_event.set()
222-
self._daemon_thread.join()
223-
self._exporter.shutdown()
239+
self._daemon_thread.join(timeout=(deadline_ns - _time_ns()) / 10**9)
240+
self._exporter.shutdown(timeout=(deadline_ns - _time_ns()) / 10**6)

opentelemetry-sdk/src/opentelemetry/sdk/_metrics/_internal/metric_reader.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def __init__(
138138
self._instrument_class_aggregation.update(preferred_aggregation or {})
139139

140140
@final
141-
def collect(self) -> None:
141+
def collect(self, timeout_millis: float = 10_000) -> None:
142142
"""Collects the metrics from the internal SDK state and
143143
invokes the `_receive_metrics` with the collection.
144144
"""
@@ -148,7 +148,8 @@ def collect(self) -> None:
148148
)
149149
return
150150
self._receive_metrics(
151-
self._collect(self, self._instrument_class_temporality)
151+
self._collect(self, self._instrument_class_temporality),
152+
timeout_millis=timeout_millis,
152153
)
153154

154155
@final
@@ -162,11 +163,16 @@ def _set_collect_callback(
162163
self._collect = func
163164

164165
@abstractmethod
165-
def _receive_metrics(self, metrics: Iterable[Metric], *args, **kwargs):
166+
def _receive_metrics(
167+
self,
168+
metrics: Iterable[Metric],
169+
timeout_millis: float = 10_000,
170+
**kwargs,
171+
) -> None:
166172
"""Called by `MetricReader.collect` when it receives a batch of metrics"""
167173

168174
@abstractmethod
169-
def shutdown(self, *args, **kwargs):
175+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
170176
"""Shuts down the MetricReader. This method provides a way
171177
for the MetricReader to do any cleanup required. A metric reader can
172178
only be shutdown once, any subsequent calls are ignored and return
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
The purpose of this test is to test for backward compatibility with any user-implementable
17+
interfaces as they were originally defined. For example, changes to the MetricExporter ABC must
18+
be made in such a way that existing implementations (outside of this repo) continue to work
19+
when *called* by the SDK.
20+
21+
This does not apply to classes which are not intended to be overriden by the user e.g. Meter
22+
and PeriodicExportingMetricReader concrete class. Those may freely be modified in a
23+
backward-compatible way for *callers*.
24+
25+
Ideally, we could use mypy for this as well, but SDK is not type checked atm.
26+
"""
27+
28+
from typing import Iterable, Sequence
29+
from unittest import TestCase
30+
31+
from opentelemetry.sdk._metrics import MeterProvider
32+
from opentelemetry.sdk._metrics.export import (
33+
MetricExporter,
34+
MetricExportResult,
35+
PeriodicExportingMetricReader,
36+
)
37+
from opentelemetry.sdk._metrics.metric_reader import MetricReader
38+
from opentelemetry.sdk._metrics.point import Metric
39+
40+
41+
# Do not change these classes until after major version 1
42+
class OrigMetricExporter(MetricExporter):
43+
def export(
44+
self,
45+
metrics: Sequence[Metric],
46+
timeout_millis: float = 10_000,
47+
**kwargs,
48+
) -> MetricExportResult:
49+
pass
50+
51+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
52+
pass
53+
54+
55+
class OrigMetricReader(MetricReader):
56+
def _receive_metrics(
57+
self,
58+
metrics: Iterable[Metric],
59+
timeout_millis: float = 10_000,
60+
**kwargs,
61+
) -> None:
62+
pass
63+
64+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
65+
self.collect()
66+
67+
68+
class TestBackwardCompat(TestCase):
69+
def test_metric_exporter(self):
70+
exporter = OrigMetricExporter()
71+
meter_provider = MeterProvider(
72+
metric_readers=[PeriodicExportingMetricReader(exporter)]
73+
)
74+
# produce some data
75+
meter_provider.get_meter("foo").create_counter("mycounter").add(12)
76+
try:
77+
meter_provider.shutdown()
78+
except Exception:
79+
self.fail()
80+
81+
def test_metric_reader(self):
82+
reader = OrigMetricReader()
83+
meter_provider = MeterProvider(metric_readers=[reader])
84+
# produce some data
85+
meter_provider.get_meter("foo").create_counter("mycounter").add(12)
86+
try:
87+
meter_provider.shutdown()
88+
except Exception:
89+
self.fail()

opentelemetry-sdk/tests/metrics/test_metric_reader.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
# limitations under the License.
1414

1515
from os import environ
16-
from typing import Dict
16+
from typing import Dict, Iterable
1717
from unittest import TestCase
1818
from unittest.mock import patch
1919

20-
from opentelemetry.sdk._metrics._internal.aggregation import Aggregation
2120
from opentelemetry.sdk._metrics.aggregation import (
21+
Aggregation,
2222
DefaultAggregation,
2323
LastValueAggregation,
2424
)
@@ -31,7 +31,7 @@
3131
UpDownCounter,
3232
)
3333
from opentelemetry.sdk._metrics.metric_reader import MetricReader
34-
from opentelemetry.sdk._metrics.point import AggregationTemporality
34+
from opentelemetry.sdk._metrics.point import AggregationTemporality, Metric
3535
from opentelemetry.sdk.environment_variables import (
3636
_OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE,
3737
)
@@ -48,10 +48,15 @@ def __init__(
4848
preferred_aggregation=preferred_aggregation,
4949
)
5050

51-
def _receive_metrics(self, metrics):
51+
def _receive_metrics(
52+
self,
53+
metrics: Iterable[Metric],
54+
timeout_millis: float = 10_000,
55+
**kwargs,
56+
) -> None:
5257
pass
5358

54-
def shutdown(self):
59+
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
5560
return True
5661

5762

0 commit comments

Comments
 (0)