Skip to content

Commit b5a0ee5

Browse files
authored
opentelemetry-instrumentation-system-metrics: add process metrics (#3250)
* opentelemetry-instrumentation-system-metrics: add process metrics Add process metrics as of 1.30.0 semconv to the system metrics instrumentation. We still keep around the old process.runtime metrics because the semconv suggest to not break current users. Still discourage their use in the doc and state explicitly they are deprecated. * Add Changelog * Please pylint * Apply suggestions from code review * Remove print * Remove process.count and fix system metrics enumeration in tests * Cleanup metrics presence assertions * Don't touch system metrics descriptions * Add default for num_cpu in case it returns None to avoid division error
1 parent 17a57bf commit b5a0ee5

File tree

3 files changed

+309
-57
lines changed

3 files changed

+309
-57
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Added
1515

16+
- `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones
17+
([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250))
1618
- `opentelemetry-instrumentation-botocore` Add support for GenAI user events and lazy initialize tracer
1719
([#3258](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3258))
1820
- `opentelemetry-instrumentation-botocore` Add support for GenAI system events

instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py

+169-17
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,19 @@
3434
"system.network.io": ["transmit", "receive"],
3535
"system.network.connections": ["family", "type"],
3636
"system.thread_count": None
37+
"process.context_switches": ["involuntary", "voluntary"],
38+
"process.cpu.time": ["user", "system"],
39+
"process.cpu.utilization": None,
40+
"process.memory.usage": None,
41+
"process.memory.virtual": None,
42+
"process.open_file_descriptor.count": None,
43+
"process.thread.count": None,
3744
"process.runtime.memory": ["rss", "vms"],
3845
"process.runtime.cpu.time": ["user", "system"],
3946
"process.runtime.gc_count": None,
4047
"process.runtime.thread_count": None,
4148
"process.runtime.cpu.utilization": None,
4249
"process.runtime.context_switches": ["involuntary", "voluntary"],
43-
"process.open_file_descriptor.count": None,
4450
}
4551
4652
Usage
@@ -66,12 +72,17 @@
6672
"system.memory.usage": ["used", "free", "cached"],
6773
"system.cpu.time": ["idle", "user", "system", "irq"],
6874
"system.network.io": ["transmit", "receive"],
69-
"process.runtime.memory": ["rss", "vms"],
70-
"process.runtime.cpu.time": ["user", "system"],
71-
"process.runtime.context_switches": ["involuntary", "voluntary"],
75+
"process.memory.usage": None,
76+
"process.memory.virtual": None,
77+
"process.cpu.time": ["user", "system"],
78+
"process.context_switches": ["involuntary", "voluntary"],
7279
}
7380
SystemMetricsInstrumentor(config=configuration).instrument()
7481
82+
83+
Out-of-spec `process.runtime` prefixed metrics are deprecated and will be removed in future versions, users are encouraged to move
84+
to the `process` metrics.
85+
7586
API
7687
---
7788
"""
@@ -92,6 +103,9 @@
92103
from opentelemetry.instrumentation.system_metrics.package import _instruments
93104
from opentelemetry.instrumentation.system_metrics.version import __version__
94105
from opentelemetry.metrics import CallbackOptions, Observation, get_meter
106+
from opentelemetry.semconv._incubating.metrics.process_metrics import (
107+
create_process_cpu_utilization,
108+
)
95109

96110
_logger = logging.getLogger(__name__)
97111

@@ -112,13 +126,19 @@
112126
"system.network.io": ["transmit", "receive"],
113127
"system.network.connections": ["family", "type"],
114128
"system.thread_count": None,
129+
"process.context_switches": ["involuntary", "voluntary"],
130+
"process.cpu.time": ["user", "system"],
131+
"process.cpu.utilization": ["user", "system"],
132+
"process.memory.usage": None,
133+
"process.memory.virtual": None,
134+
"process.open_file_descriptor.count": None,
135+
"process.thread.count": None,
115136
"process.runtime.memory": ["rss", "vms"],
116137
"process.runtime.cpu.time": ["user", "system"],
117138
"process.runtime.gc_count": None,
118139
"process.runtime.thread_count": None,
119140
"process.runtime.cpu.utilization": None,
120141
"process.runtime.context_switches": ["involuntary", "voluntary"],
121-
"process.open_file_descriptor.count": None,
122142
}
123143

124144
if sys.platform == "darwin":
@@ -165,19 +185,26 @@ def __init__(
165185

166186
self._system_thread_count_labels = self._labels.copy()
167187

188+
self._context_switches_labels = self._labels.copy()
189+
self._cpu_time_labels = self._labels.copy()
190+
self._cpu_utilization_labels = self._labels.copy()
191+
self._memory_usage_labels = self._labels.copy()
192+
self._memory_virtual_labels = self._labels.copy()
193+
self._open_file_descriptor_count_labels = self._labels.copy()
194+
self._thread_count_labels = self._labels.copy()
195+
168196
self._runtime_memory_labels = self._labels.copy()
169197
self._runtime_cpu_time_labels = self._labels.copy()
170198
self._runtime_gc_count_labels = self._labels.copy()
171199
self._runtime_thread_count_labels = self._labels.copy()
172200
self._runtime_cpu_utilization_labels = self._labels.copy()
173201
self._runtime_context_switches_labels = self._labels.copy()
174-
self._open_file_descriptor_count_labels = self._labels.copy()
175202

176203
def instrumentation_dependencies(self) -> Collection[str]:
177204
return _instruments
178205

179206
def _instrument(self, **kwargs: Any):
180-
# pylint: disable=too-many-branches
207+
# pylint: disable=too-many-branches,too-many-statements
181208
meter_provider = kwargs.get("meter_provider")
182209
self._meter = get_meter(
183210
__name__,
@@ -186,6 +213,8 @@ def _instrument(self, **kwargs: Any):
186213
schema_url="https://opentelemetry.io/schemas/1.11.0",
187214
)
188215

216+
# system metrics
217+
189218
if "system.cpu.time" in self._config:
190219
self._meter.create_observable_counter(
191220
name="system.cpu.time",
@@ -194,6 +223,7 @@ def _instrument(self, **kwargs: Any):
194223
unit="s",
195224
)
196225

226+
# FIXME: double check this is divided by cpu core
197227
if "system.cpu.utilization" in self._config:
198228
self._meter.create_observable_gauge(
199229
name="system.cpu.utilization",
@@ -218,6 +248,7 @@ def _instrument(self, **kwargs: Any):
218248
unit="1",
219249
)
220250

251+
# FIXME: system.swap is gone in favour of system.paging
221252
if "system.swap.usage" in self._config:
222253
self._meter.create_observable_gauge(
223254
name="system.swap.usage",
@@ -269,6 +300,7 @@ def _instrument(self, **kwargs: Any):
269300
unit="operations",
270301
)
271302

303+
# FIXME: this has been replaced by system.disk.operation.time
272304
if "system.disk.time" in self._config:
273305
self._meter.create_observable_counter(
274306
name="system.disk.time",
@@ -299,6 +331,7 @@ def _instrument(self, **kwargs: Any):
299331
# TODO Filesystem information can be obtained with os.statvfs in Unix-like
300332
# OSs, how to do the same in Windows?
301333

334+
# FIXME: this is now just system.network.dropped
302335
if "system.network.dropped.packets" in self._config:
303336
self._meter.create_observable_counter(
304337
name="system.network.dropped_packets",
@@ -339,13 +372,72 @@ def _instrument(self, **kwargs: Any):
339372
unit="connections",
340373
)
341374

375+
# FIXME: this is gone
342376
if "system.thread_count" in self._config:
343377
self._meter.create_observable_gauge(
344378
name="system.thread_count",
345379
callbacks=[self._get_system_thread_count],
346380
description="System active threads count",
347381
)
348382

383+
# process metrics
384+
385+
if "process.cpu.time" in self._config:
386+
self._meter.create_observable_counter(
387+
name="process.cpu.time",
388+
callbacks=[self._get_cpu_time],
389+
description="Total CPU seconds broken down by different states.",
390+
unit="s",
391+
)
392+
393+
if "process.cpu.utilization" in self._config:
394+
create_process_cpu_utilization(
395+
self._meter, callbacks=[self._get_cpu_utilization]
396+
)
397+
398+
if "process.context_switches" in self._config:
399+
self._meter.create_observable_counter(
400+
name="process.context_switches",
401+
callbacks=[self._get_context_switches],
402+
description="Number of times the process has been context switched.",
403+
)
404+
405+
if "process.memory.usage" in self._config:
406+
self._meter.create_observable_up_down_counter(
407+
name="process.memory.usage",
408+
callbacks=[self._get_memory_usage],
409+
description="The amount of physical memory in use.",
410+
unit="By",
411+
)
412+
413+
if "process.memory.virtual" in self._config:
414+
self._meter.create_observable_up_down_counter(
415+
name="process.memory.virtual",
416+
callbacks=[self._get_memory_virtual],
417+
description="The amount of committed virtual memory.",
418+
unit="By",
419+
)
420+
421+
if (
422+
sys.platform != "win32"
423+
and "process.open_file_descriptor.count" in self._config
424+
):
425+
self._meter.create_observable_up_down_counter(
426+
name="process.open_file_descriptor.count",
427+
callbacks=[self._get_open_file_descriptors],
428+
description="Number of file descriptors in use by the process.",
429+
)
430+
431+
if "process.thread.count" in self._config:
432+
self._meter.create_observable_up_down_counter(
433+
name="process.thread.count",
434+
callbacks=[self._get_thread_count],
435+
description="Process threads count.",
436+
)
437+
438+
# FIXME: process.runtime keys are deprecated and will be removed in subsequent releases.
439+
# When removing them, remember to clean also the callbacks and labels
440+
349441
if "process.runtime.memory" in self._config:
350442
self._meter.create_observable_up_down_counter(
351443
name=f"process.runtime.{self._python_implementation}.memory",
@@ -398,16 +490,6 @@ def _instrument(self, **kwargs: Any):
398490
unit="switches",
399491
)
400492

401-
if (
402-
sys.platform != "win32"
403-
and "process.open_file_descriptor.count" in self._config
404-
):
405-
self._meter.create_observable_up_down_counter(
406-
name="process.open_file_descriptor.count",
407-
callbacks=[self._get_open_file_descriptors],
408-
description="Number of file descriptors in use by the process.",
409-
)
410-
411493
def _uninstrument(self, **kwargs: Any):
412494
pass
413495

@@ -685,6 +767,76 @@ def _get_system_thread_count(
685767
threading.active_count(), self._system_thread_count_labels
686768
)
687769

770+
# process callbacks
771+
772+
def _get_context_switches(
773+
self, options: CallbackOptions
774+
) -> Iterable[Observation]:
775+
"""Observer callback for context switches"""
776+
ctx_switches = self._proc.num_ctx_switches()
777+
for metric in self._config["process.context_switches"]:
778+
if hasattr(ctx_switches, metric):
779+
self._context_switches_labels["type"] = metric
780+
yield Observation(
781+
getattr(ctx_switches, metric),
782+
self._context_switches_labels.copy(),
783+
)
784+
785+
def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]:
786+
"""Observer callback for CPU time"""
787+
proc_cpu = self._proc.cpu_times()
788+
for metric in self._config["process.cpu.time"]:
789+
if hasattr(proc_cpu, metric):
790+
self._cpu_time_labels["type"] = metric
791+
yield Observation(
792+
getattr(proc_cpu, metric),
793+
self._cpu_time_labels.copy(),
794+
)
795+
796+
def _get_cpu_utilization(
797+
self, options: CallbackOptions
798+
) -> Iterable[Observation]:
799+
"""Observer callback for CPU utilization"""
800+
proc_cpu_percent = self._proc.cpu_percent()
801+
# may return None so add a default of 1 in case
802+
num_cpus = psutil.cpu_count() or 1
803+
yield Observation(
804+
proc_cpu_percent / 100 / num_cpus,
805+
self._cpu_utilization_labels.copy(),
806+
)
807+
808+
def _get_memory_usage(
809+
self, options: CallbackOptions
810+
) -> Iterable[Observation]:
811+
"""Observer callback for memory usage"""
812+
proc_memory = self._proc.memory_info()
813+
if hasattr(proc_memory, "rss"):
814+
yield Observation(
815+
getattr(proc_memory, "rss"),
816+
self._memory_usage_labels.copy(),
817+
)
818+
819+
def _get_memory_virtual(
820+
self, options: CallbackOptions
821+
) -> Iterable[Observation]:
822+
"""Observer callback for memory virtual"""
823+
proc_memory = self._proc.memory_info()
824+
if hasattr(proc_memory, "vms"):
825+
yield Observation(
826+
getattr(proc_memory, "vms"),
827+
self._memory_virtual_labels.copy(),
828+
)
829+
830+
def _get_thread_count(
831+
self, options: CallbackOptions
832+
) -> Iterable[Observation]:
833+
"""Observer callback for active thread count"""
834+
yield Observation(
835+
self._proc.num_threads(), self._thread_count_labels.copy()
836+
)
837+
838+
# runtime callbacks
839+
688840
def _get_runtime_memory(
689841
self, options: CallbackOptions
690842
) -> Iterable[Observation]:

0 commit comments

Comments
 (0)