From 821e71ccd21de1fdf26f9ff5ce60d0c534eb1979 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Fri, 7 Feb 2025 15:48:57 +0100 Subject: [PATCH 1/9] opentelemetry-instrumentation-system-metrics: add process metrics Add process metrics as of 1.30.0 semconv to the system metrics instrumentation. We still keep around the old process.runtime metrics because the semconv suggest to not break current users. Still discourage their use in the doc and state explicitly they are deprecated. --- .../system_metrics/__init__.py | 191 ++++++++++++++++-- .../tests/test_system_metrics.py | 148 ++++++++++++-- 2 files changed, 304 insertions(+), 35 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index a81c5bf7ff..5f32674336 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -34,13 +34,20 @@ "system.network.io": ["transmit", "receive"], "system.network.connections": ["family", "type"], "system.thread_count": None + "process.context_switches": ["involuntary", "voluntary"], + "process.count": None, + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": None, + "process.memory.usage": None, + "process.memory.virtual": None, + "process.open_file_descriptor.count": None, + "process.thread.count": None, "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, "process.runtime.thread_count": None, "process.runtime.cpu.utilization": None, "process.runtime.context_switches": ["involuntary", "voluntary"], - "process.open_file_descriptor.count": None, } Usage @@ -66,12 +73,17 @@ "system.memory.usage": ["used", "free", "cached"], "system.cpu.time": ["idle", "user", "system", "irq"], "system.network.io": ["transmit", "receive"], - "process.runtime.memory": ["rss", "vms"], - "process.runtime.cpu.time": ["user", "system"], - "process.runtime.context_switches": ["involuntary", "voluntary"], + "process.memory.usage": None, + "process.memory.virtual": None, + "process.cpu.time": ["user", "system"], + "process.context_switches": ["involuntary", "voluntary"], } SystemMetricsInstrumentor(config=configuration).instrument() + +Out-of-spec `process.runtime` prefixed metrics are deprecated and will be remobed in future versions, users are encouraged to move +to the `process` metrics. + API --- """ @@ -92,6 +104,9 @@ from opentelemetry.instrumentation.system_metrics.package import _instruments from opentelemetry.instrumentation.system_metrics.version import __version__ from opentelemetry.metrics import CallbackOptions, Observation, get_meter +from opentelemetry.semconv._incubating.metrics.process_metrics import ( + create_process_cpu_utilization, +) _logger = logging.getLogger(__name__) @@ -112,13 +127,20 @@ "system.network.io": ["transmit", "receive"], "system.network.connections": ["family", "type"], "system.thread_count": None, + "process.context_switches": ["involuntary", "voluntary"], + "process.count": None, + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": ["user", "system"], + "process.memory.usage": None, + "process.memory.virtual": None, + "process.open_file_descriptor.count": None, + "process.thread.count": None, "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, "process.runtime.thread_count": None, "process.runtime.cpu.utilization": None, "process.runtime.context_switches": ["involuntary", "voluntary"], - "process.open_file_descriptor.count": None, } if sys.platform == "darwin": @@ -165,13 +187,20 @@ def __init__( self._system_thread_count_labels = self._labels.copy() + self._context_switches_labels = self._labels.copy() + self._cpu_time_labels = self._labels.copy() + self._cpu_utilization_labels = self._labels.copy() + self._memory_usage_labels = self._labels.copy() + self._memory_virtual_labels = self._labels.copy() + self._open_file_descriptor_count_labels = self._labels.copy() + self._thread_count_labels = self._labels.copy() + self._runtime_memory_labels = self._labels.copy() self._runtime_cpu_time_labels = self._labels.copy() self._runtime_gc_count_labels = self._labels.copy() self._runtime_thread_count_labels = self._labels.copy() self._runtime_cpu_utilization_labels = self._labels.copy() self._runtime_context_switches_labels = self._labels.copy() - self._open_file_descriptor_count_labels = self._labels.copy() def instrumentation_dependencies(self) -> Collection[str]: return _instruments @@ -186,19 +215,22 @@ def _instrument(self, **kwargs: Any): schema_url="https://opentelemetry.io/schemas/1.11.0", ) + # system metrics + if "system.cpu.time" in self._config: self._meter.create_observable_counter( name="system.cpu.time", callbacks=[self._get_system_cpu_time], - description="System CPU time", + description="Seconds each logical CPU spent on each mode", unit="s", ) + # FIXME: double check this is divided by cpu core if "system.cpu.utilization" in self._config: self._meter.create_observable_gauge( name="system.cpu.utilization", callbacks=[self._get_system_cpu_utilization], - description="System CPU utilization", + description="Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs", unit="1", ) @@ -206,7 +238,7 @@ def _instrument(self, **kwargs: Any): self._meter.create_observable_gauge( name="system.memory.usage", callbacks=[self._get_system_memory_usage], - description="System memory usage", + description="Reports memory in use by state.", unit="By", ) @@ -218,6 +250,7 @@ def _instrument(self, **kwargs: Any): unit="1", ) + # FIXME: system.swap is gone in favour of system.paging if "system.swap.usage" in self._config: self._meter.create_observable_gauge( name="system.swap.usage", @@ -269,6 +302,7 @@ def _instrument(self, **kwargs: Any): unit="operations", ) + # FIXME: this has been replaced by system.disk.operation.time if "system.disk.time" in self._config: self._meter.create_observable_counter( name="system.disk.time", @@ -299,6 +333,7 @@ def _instrument(self, **kwargs: Any): # TODO Filesystem information can be obtained with os.statvfs in Unix-like # OSs, how to do the same in Windows? + # FIXME: this is now just system.network.dropped if "system.network.dropped.packets" in self._config: self._meter.create_observable_counter( name="system.network.dropped_packets", @@ -339,6 +374,7 @@ def _instrument(self, **kwargs: Any): unit="connections", ) + # FIXME: this is gone if "system.thread_count" in self._config: self._meter.create_observable_gauge( name="system.thread_count", @@ -346,6 +382,64 @@ def _instrument(self, **kwargs: Any): description="System active threads count", ) + # process metrics + + if "process.cpu.time" in self._config: + self._meter.create_observable_counter( + name="process.cpu.time", + callbacks=[self._get_cpu_time], + description="Total CPU seconds broken down by different states.", + unit="s", + ) + + if "process.cpu.utilization" in self._config: + create_process_cpu_utilization( + self._meter, callbacks=[self._get_cpu_utilization] + ) + + if "process.context_switches" in self._config: + self._meter.create_observable_counter( + name="process.context_switches", + callbacks=[self._get_context_switches], + description="Number of times the process has been context switched.", + ) + + if "process.memory.usage" in self._config: + self._meter.create_observable_up_down_counter( + name="process.memory.usage", + callbacks=[self._get_memory_usage], + description="The amount of physical memory in use.", + unit="By", + ) + + if "process.memory.virtual" in self._config: + self._meter.create_observable_up_down_counter( + name="process.memory.virtual", + callbacks=[self._get_memory_virtual], + description="The amount of committed virtual memory.", + unit="By", + ) + + if ( + sys.platform != "win32" + and "process.open_file_descriptor.count" in self._config + ): + self._meter.create_observable_up_down_counter( + name="process.open_file_descriptor.count", + callbacks=[self._get_open_file_descriptors], + description="Number of file descriptors in use by the process.", + ) + + if "process.thread.count" in self._config: + self._meter.create_observable_up_down_counter( + name="process.thread.count", + callbacks=[self._get_thread_count], + description="Process threads count.", + ) + + # FIXME: process.runtime keys are deprecated and will be removed in subsequent releases. + # When removing them, remember to clean also the callbacks and labels + if "process.runtime.memory" in self._config: self._meter.create_observable_up_down_counter( name=f"process.runtime.{self._python_implementation}.memory", @@ -398,16 +492,6 @@ def _instrument(self, **kwargs: Any): unit="switches", ) - if ( - sys.platform != "win32" - and "process.open_file_descriptor.count" in self._config - ): - self._meter.create_observable_up_down_counter( - name="process.open_file_descriptor.count", - callbacks=[self._get_open_file_descriptors], - description="Number of file descriptors in use by the process.", - ) - def _uninstrument(self, **kwargs: Any): pass @@ -685,6 +769,75 @@ def _get_system_thread_count( threading.active_count(), self._system_thread_count_labels ) + # process callbacks + + def _get_context_switches( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for context switches""" + ctx_switches = self._proc.num_ctx_switches() + for metric in self._config["process.context_switches"]: + if hasattr(ctx_switches, metric): + self._context_switches_labels["type"] = metric + yield Observation( + getattr(ctx_switches, metric), + self._context_switches_labels.copy(), + ) + + def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]: + """Observer callback for CPU time""" + proc_cpu = self._proc.cpu_times() + for metric in self._config["process.cpu.time"]: + if hasattr(proc_cpu, metric): + self._cpu_time_labels["type"] = metric + yield Observation( + getattr(proc_cpu, metric), + self._cpu_time_labels.copy(), + ) + + def _get_cpu_utilization( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for CPU utilization""" + proc_cpu_percent = self._proc.cpu_percent() + num_cpus = psutil.cpu_count() + yield Observation( + proc_cpu_percent / 100 / num_cpus, + self._cpu_utilization_labels.copy(), + ) + + def _get_memory_usage( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for memory usage""" + proc_memory = self._proc.memory_info() + if hasattr(proc_memory, "rss"): + yield Observation( + getattr(proc_memory, "rss"), + self._memory_usage_labels.copy(), + ) + + def _get_memory_virtual( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for memory virtual""" + proc_memory = self._proc.memory_info() + if hasattr(proc_memory, "vms"): + yield Observation( + getattr(proc_memory, "vms"), + self._memory_virtual_labels.copy(), + ) + + def _get_thread_count( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for active thread count""" + yield Observation( + self._proc.num_threads(), self._thread_count_labels.copy() + ) + + # runtime callbacks + def _get_runtime_memory( self, options: CallbackOptions ) -> Iterable[Observation]: diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index 92c30a66f0..3d5f0aa99a 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -115,6 +115,14 @@ def test_system_metrics_instrument(self): "system.network.io", "system.network.connections", "system.thread_count", + "process.context_switches", + "process.count", + "process.cpu.time", + "process.cpu.utilization", + "process.memory.usage", + "process.memory.virtual", + "process.open_file_descriptor.count", + "process.thread.count", f"process.runtime.{self.implementation}.memory", f"process.runtime.{self.implementation}.cpu_time", f"process.runtime.{self.implementation}.thread_count", @@ -124,9 +132,9 @@ def test_system_metrics_instrument(self): on_windows = sys.platform == "win32" if self.implementation == "pypy": - self.assertEqual(len(metric_names), 20 if on_windows else 21) + self.assertEqual(len(metric_names), 26 if on_windows else 27) else: - self.assertEqual(len(metric_names), 21 if on_windows else 22) + self.assertEqual(len(metric_names), 27 if on_windows else 28) observer_names.append( f"process.runtime.{self.implementation}.gc_count", ) @@ -144,6 +152,44 @@ def test_system_metrics_instrument(self): "process.open_file_descriptor.count", observer_names ) + def test_process_metrics_instrument(self): + runtime_config = { + "process.memory.usage": None, + "process.memory.virtual": None, + "process.cpu.time": ["user", "system"], + "process.thread.count": None, + "process.cpu.utilization": None, + "process.context_switches": ["involuntary", "voluntary"], + } + + reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[reader]) + runtime_metrics = SystemMetricsInstrumentor(config=runtime_config) + runtime_metrics.instrument(meter_provider=meter_provider) + + metric_names = [] + for resource_metrics in reader.get_metrics_data().resource_metrics: + for scope_metrics in resource_metrics.scope_metrics: + for metric in scope_metrics.metrics: + metric_names.append(metric.name) + + observer_names = [ + "process.memory.usage", + "process.memory.virtual", + "process.cpu.time", + "process.thread.count", + "process.context_switches", + "process.cpu.utilization", + ] + + print(metric_names) + + self.assertEqual(len(metric_names), 6) + + for observer in metric_names: + self.assertIn(observer, observer_names) + observer_names.remove(observer) + def test_runtime_metrics_instrument(self): runtime_config = { "process.runtime.memory": ["rss", "vms"], @@ -773,6 +819,88 @@ def test_system_thread_count(self, threading_active_count): expected = [_SystemMetricsResult({}, 42)] self._test_metrics("system.thread_count", expected) + @mock.patch("psutil.Process.memory_info") + def test_memory_usage(self, mock_process_memory_info): + PMem = namedtuple("PMem", ["rss", "vms"]) + + mock_process_memory_info.configure_mock( + **{"return_value": PMem(rss=1, vms=2)} + ) + + expected = [ + _SystemMetricsResult({}, 1), + ] + self._test_metrics("process.memory.usage", expected) + + @mock.patch("psutil.Process.memory_info") + def test_memory_virtual(self, mock_process_memory_info): + PMem = namedtuple("PMem", ["rss", "vms"]) + + mock_process_memory_info.configure_mock( + **{"return_value": PMem(rss=1, vms=2)} + ) + + expected = [ + _SystemMetricsResult({}, 2), + ] + self._test_metrics("process.memory.virtual", expected) + + @mock.patch("psutil.Process.cpu_times") + def test_cpu_time(self, mock_process_cpu_times): + PCPUTimes = namedtuple("PCPUTimes", ["user", "system"]) + + mock_process_cpu_times.configure_mock( + **{"return_value": PCPUTimes(user=1.1, system=2.2)} + ) + + expected = [ + _SystemMetricsResult({"type": "user"}, 1.1), + _SystemMetricsResult({"type": "system"}, 2.2), + ] + self._test_metrics("process.cpu.time", expected) + + @mock.patch("psutil.Process.num_ctx_switches") + def test_context_switches(self, mock_process_num_ctx_switches): + PCtxSwitches = namedtuple("PCtxSwitches", ["voluntary", "involuntary"]) + + mock_process_num_ctx_switches.configure_mock( + **{"return_value": PCtxSwitches(voluntary=1, involuntary=2)} + ) + + expected = [ + _SystemMetricsResult({"type": "voluntary"}, 1), + _SystemMetricsResult({"type": "involuntary"}, 2), + ] + self._test_metrics("process.context_switches", expected) + + @mock.patch("psutil.Process.num_threads") + def test_thread_count(self, mock_process_thread_num): + mock_process_thread_num.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 42)] + self._test_metrics("process.thread.count", expected) + + @mock.patch("psutil.Process.cpu_percent") + @mock.patch("psutil.cpu_count") + def test_cpu_utilization(self, mock_cpu_count, mock_process_cpu_percent): + mock_cpu_count.return_value = 1 + mock_process_cpu_percent.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 0.42)] + self._test_metrics("process.cpu.utilization", expected) + + @skipIf(sys.platform == "win32", "No file descriptors on Windows") + @mock.patch("psutil.Process.num_fds") + def test_open_file_descriptor_count(self, mock_process_num_fds): + mock_process_num_fds.configure_mock(**{"return_value": 3}) + + expected = [_SystemMetricsResult({}, 3)] + self._test_metrics( + "process.open_file_descriptor.count", + expected, + ) + mock_process_num_fds.assert_called() + @mock.patch("psutil.Process.memory_info") def test_runtime_memory(self, mock_process_memory_info): PMem = namedtuple("PMem", ["rss", "vms"]) @@ -838,7 +966,7 @@ def test_runtime_context_switches(self, mock_process_num_ctx_switches): ) @mock.patch("psutil.Process.num_threads") - def test_runtime_thread_num(self, mock_process_thread_num): + def test_runtime_thread_count(self, mock_process_thread_num): mock_process_thread_num.configure_mock(**{"return_value": 42}) expected = [_SystemMetricsResult({}, 42)] @@ -847,7 +975,7 @@ def test_runtime_thread_num(self, mock_process_thread_num): ) @mock.patch("psutil.Process.cpu_percent") - def test_runtime_cpu_percent(self, mock_process_cpu_percent): + def test_runtime_cpu_utilization(self, mock_process_cpu_percent): mock_process_cpu_percent.configure_mock(**{"return_value": 42}) expected = [_SystemMetricsResult({}, 0.42)] @@ -855,18 +983,6 @@ def test_runtime_cpu_percent(self, mock_process_cpu_percent): f"process.runtime.{self.implementation}.cpu.utilization", expected ) - @skipIf(sys.platform == "win32", "No file descriptors on Windows") - @mock.patch("psutil.Process.num_fds") - def test_open_file_descriptor_count(self, mock_process_num_fds): - mock_process_num_fds.configure_mock(**{"return_value": 3}) - - expected = [_SystemMetricsResult({}, 3)] - self._test_metrics( - "process.open_file_descriptor.count", - expected, - ) - mock_process_num_fds.assert_called() - class TestConfigSystemMetrics(TestBase): # pylint:disable=no-self-use From 01ee403e6d826ea565d0a42d31875f8691258239 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Fri, 7 Feb 2025 16:30:16 +0100 Subject: [PATCH 2/9] Add Changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83321b1700..3ba5c63613 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones + ([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250)) + ## Version 1.30.0/0.51b0 (2025-02-03) ### Added From acb4e1a24f129d4c96faa80069a1749cf2aaaf79 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Fri, 7 Feb 2025 17:29:15 +0100 Subject: [PATCH 3/9] Please pylint --- .../opentelemetry/instrumentation/system_metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index 5f32674336..734665fce7 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -206,7 +206,7 @@ def instrumentation_dependencies(self) -> Collection[str]: return _instruments def _instrument(self, **kwargs: Any): - # pylint: disable=too-many-branches + # pylint: disable=too-many-branches,too-many-statements meter_provider = kwargs.get("meter_provider") self._meter = get_meter( __name__, From a6e746e131939fedcf3ed67ee41dd3ed1668b713 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 18 Feb 2025 09:21:54 +0100 Subject: [PATCH 4/9] Apply suggestions from code review --- CHANGELOG.md | 2 +- .../opentelemetry/instrumentation/system_metrics/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26e81ce429..6547b2d211 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -### Added +### Added - `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones ([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250)) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index 734665fce7..24d86dabe2 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -81,7 +81,7 @@ SystemMetricsInstrumentor(config=configuration).instrument() -Out-of-spec `process.runtime` prefixed metrics are deprecated and will be remobed in future versions, users are encouraged to move +Out-of-spec `process.runtime` prefixed metrics are deprecated and will be removed in future versions, users are encouraged to move to the `process` metrics. API From e6db765b00d71c7db1a763c6242cc20a107d41b2 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 18 Feb 2025 14:12:53 +0100 Subject: [PATCH 5/9] Remove print --- .../tests/test_system_metrics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index 3d5f0aa99a..115abdf585 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -182,8 +182,6 @@ def test_process_metrics_instrument(self): "process.cpu.utilization", ] - print(metric_names) - self.assertEqual(len(metric_names), 6) for observer in metric_names: From df13edb674c0c6af4e348f743a0ad6ad153261cd Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 18 Feb 2025 14:32:27 +0100 Subject: [PATCH 6/9] Remove process.count and fix system metrics enumeration in tests --- .../system_metrics/__init__.py | 2 -- .../tests/test_system_metrics.py | 26 ++++++++----------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index 24d86dabe2..b86ab7bb1f 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -35,7 +35,6 @@ "system.network.connections": ["family", "type"], "system.thread_count": None "process.context_switches": ["involuntary", "voluntary"], - "process.count": None, "process.cpu.time": ["user", "system"], "process.cpu.utilization": None, "process.memory.usage": None, @@ -128,7 +127,6 @@ "system.network.connections": ["family", "type"], "system.thread_count": None, "process.context_switches": ["involuntary", "voluntary"], - "process.count": None, "process.cpu.time": ["user", "system"], "process.cpu.utilization": ["user", "system"], "process.memory.usage": None, diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index 115abdf585..31d60db53c 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -116,12 +116,10 @@ def test_system_metrics_instrument(self): "system.network.connections", "system.thread_count", "process.context_switches", - "process.count", "process.cpu.time", "process.cpu.utilization", "process.memory.usage", "process.memory.virtual", - "process.open_file_descriptor.count", "process.thread.count", f"process.runtime.{self.implementation}.memory", f"process.runtime.{self.implementation}.cpu_time", @@ -130,27 +128,25 @@ def test_system_metrics_instrument(self): f"process.runtime.{self.implementation}.cpu.utilization", ] - on_windows = sys.platform == "win32" - if self.implementation == "pypy": - self.assertEqual(len(metric_names), 26 if on_windows else 27) - else: - self.assertEqual(len(metric_names), 27 if on_windows else 28) - observer_names.append( - f"process.runtime.{self.implementation}.gc_count", - ) - if not on_windows: + # platform dependent metrics + if sys.platform != "win32": observer_names.append( "process.open_file_descriptor.count", ) + if self.implementation != "pypy": + observer_names.append( + f"process.runtime.{self.implementation}.gc_count", + ) + + num_expected_metrics = len(observer_names) + self.assertEqual(len(metric_names), num_expected_metrics) for observer in metric_names: self.assertIn(observer, observer_names) observer_names.remove(observer) + self.assertNotIn(observer, observer_names) - if on_windows: - self.assertNotIn( - "process.open_file_descriptor.count", observer_names - ) + self.assertFalse(observer_names) def test_process_metrics_instrument(self): runtime_config = { From ab151d66a70da10f3bc5c509c330017f9199fc48 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 18 Feb 2025 14:57:52 +0100 Subject: [PATCH 7/9] Cleanup metrics presence assertions --- .../tests/test_system_metrics.py | 50 +++++++------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index 31d60db53c..c5a600218b 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -138,30 +138,23 @@ def test_system_metrics_instrument(self): f"process.runtime.{self.implementation}.gc_count", ) - num_expected_metrics = len(observer_names) - self.assertEqual(len(metric_names), num_expected_metrics) - - for observer in metric_names: - self.assertIn(observer, observer_names) - observer_names.remove(observer) - self.assertNotIn(observer, observer_names) - - self.assertFalse(observer_names) + self.assertEqual(sorted(metric_names), sorted(observer_names)) def test_process_metrics_instrument(self): - runtime_config = { + process_config = { + "process.context_switches": ["involuntary", "voluntary"], + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": None, "process.memory.usage": None, "process.memory.virtual": None, - "process.cpu.time": ["user", "system"], + "process.open_file_descriptor.count": None, "process.thread.count": None, - "process.cpu.utilization": None, - "process.context_switches": ["involuntary", "voluntary"], } reader = InMemoryMetricReader() meter_provider = MeterProvider(metric_readers=[reader]) - runtime_metrics = SystemMetricsInstrumentor(config=runtime_config) - runtime_metrics.instrument(meter_provider=meter_provider) + process_metrics = SystemMetricsInstrumentor(config=process_config) + process_metrics.instrument(meter_provider=meter_provider) metric_names = [] for resource_metrics in reader.get_metrics_data().resource_metrics: @@ -177,12 +170,13 @@ def test_process_metrics_instrument(self): "process.context_switches", "process.cpu.utilization", ] + # platform dependent metrics + if sys.platform != "win32": + observer_names.append( + "process.open_file_descriptor.count", + ) - self.assertEqual(len(metric_names), 6) - - for observer in metric_names: - self.assertIn(observer, observer_names) - observer_names.remove(observer) + self.assertEqual(sorted(metric_names), sorted(observer_names)) def test_runtime_metrics_instrument(self): runtime_config = { @@ -214,18 +208,12 @@ def test_runtime_metrics_instrument(self): f"process.runtime.{self.implementation}.context_switches", f"process.runtime.{self.implementation}.cpu.utilization", ] + if self.implementation != "pypy": + observer_names.append( + f"process.runtime.{self.implementation}.gc_count" + ) - if self.implementation == "pypy": - self.assertEqual(len(metric_names), 5) - else: - self.assertEqual(len(metric_names), 6) - observer_names.append( - f"process.runtime.{self.implementation}.gc_count" - ) - - for observer in metric_names: - self.assertIn(observer, observer_names) - observer_names.remove(observer) + self.assertEqual(sorted(metric_names), sorted(observer_names)) def _assert_metrics(self, observer_name, reader, expected): assertions = 0 From fc355a5cc5da623cdf147068e18515c3a37896a3 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Thu, 20 Feb 2025 09:50:57 +0100 Subject: [PATCH 8/9] Don't touch system metrics descriptions --- .../instrumentation/system_metrics/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index b86ab7bb1f..74ee55dd60 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -219,7 +219,7 @@ def _instrument(self, **kwargs: Any): self._meter.create_observable_counter( name="system.cpu.time", callbacks=[self._get_system_cpu_time], - description="Seconds each logical CPU spent on each mode", + description="System CPU time", unit="s", ) @@ -228,7 +228,7 @@ def _instrument(self, **kwargs: Any): self._meter.create_observable_gauge( name="system.cpu.utilization", callbacks=[self._get_system_cpu_utilization], - description="Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs", + description="System CPU utilization", unit="1", ) @@ -236,7 +236,7 @@ def _instrument(self, **kwargs: Any): self._meter.create_observable_gauge( name="system.memory.usage", callbacks=[self._get_system_memory_usage], - description="Reports memory in use by state.", + description="System memory usage", unit="By", ) From 1f4b11e65f37976c1c5d1844ed97bf70bbbc1284 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Fri, 21 Feb 2025 09:51:30 +0100 Subject: [PATCH 9/9] Add default for num_cpu in case it returns None to avoid division error --- .../opentelemetry/instrumentation/system_metrics/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index 74ee55dd60..e0407d8ab4 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -798,7 +798,8 @@ def _get_cpu_utilization( ) -> Iterable[Observation]: """Observer callback for CPU utilization""" proc_cpu_percent = self._proc.cpu_percent() - num_cpus = psutil.cpu_count() + # may return None so add a default of 1 in case + num_cpus = psutil.cpu_count() or 1 yield Observation( proc_cpu_percent / 100 / num_cpus, self._cpu_utilization_labels.copy(),