diff --git a/CHANGELOG.md b/CHANGELOG.md index 101cafd361..95a1ea22f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones + ([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250)) - `opentelemetry-instrumentation-botocore` Add support for GenAI user events and lazy initialize tracer ([#3258](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3258)) - `opentelemetry-instrumentation-botocore` Add support for GenAI system events diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py index a81c5bf7ff..e0407d8ab4 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/src/opentelemetry/instrumentation/system_metrics/__init__.py @@ -34,13 +34,19 @@ "system.network.io": ["transmit", "receive"], "system.network.connections": ["family", "type"], "system.thread_count": None + "process.context_switches": ["involuntary", "voluntary"], + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": None, + "process.memory.usage": None, + "process.memory.virtual": None, + "process.open_file_descriptor.count": None, + "process.thread.count": None, "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, "process.runtime.thread_count": None, "process.runtime.cpu.utilization": None, "process.runtime.context_switches": ["involuntary", "voluntary"], - "process.open_file_descriptor.count": None, } Usage @@ -66,12 +72,17 @@ "system.memory.usage": ["used", "free", "cached"], "system.cpu.time": ["idle", "user", "system", "irq"], "system.network.io": ["transmit", "receive"], - "process.runtime.memory": ["rss", "vms"], - "process.runtime.cpu.time": ["user", "system"], - "process.runtime.context_switches": ["involuntary", "voluntary"], + "process.memory.usage": None, + "process.memory.virtual": None, + "process.cpu.time": ["user", "system"], + "process.context_switches": ["involuntary", "voluntary"], } SystemMetricsInstrumentor(config=configuration).instrument() + +Out-of-spec `process.runtime` prefixed metrics are deprecated and will be removed in future versions, users are encouraged to move +to the `process` metrics. + API --- """ @@ -92,6 +103,9 @@ from opentelemetry.instrumentation.system_metrics.package import _instruments from opentelemetry.instrumentation.system_metrics.version import __version__ from opentelemetry.metrics import CallbackOptions, Observation, get_meter +from opentelemetry.semconv._incubating.metrics.process_metrics import ( + create_process_cpu_utilization, +) _logger = logging.getLogger(__name__) @@ -112,13 +126,19 @@ "system.network.io": ["transmit", "receive"], "system.network.connections": ["family", "type"], "system.thread_count": None, + "process.context_switches": ["involuntary", "voluntary"], + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": ["user", "system"], + "process.memory.usage": None, + "process.memory.virtual": None, + "process.open_file_descriptor.count": None, + "process.thread.count": None, "process.runtime.memory": ["rss", "vms"], "process.runtime.cpu.time": ["user", "system"], "process.runtime.gc_count": None, "process.runtime.thread_count": None, "process.runtime.cpu.utilization": None, "process.runtime.context_switches": ["involuntary", "voluntary"], - "process.open_file_descriptor.count": None, } if sys.platform == "darwin": @@ -165,19 +185,26 @@ def __init__( self._system_thread_count_labels = self._labels.copy() + self._context_switches_labels = self._labels.copy() + self._cpu_time_labels = self._labels.copy() + self._cpu_utilization_labels = self._labels.copy() + self._memory_usage_labels = self._labels.copy() + self._memory_virtual_labels = self._labels.copy() + self._open_file_descriptor_count_labels = self._labels.copy() + self._thread_count_labels = self._labels.copy() + self._runtime_memory_labels = self._labels.copy() self._runtime_cpu_time_labels = self._labels.copy() self._runtime_gc_count_labels = self._labels.copy() self._runtime_thread_count_labels = self._labels.copy() self._runtime_cpu_utilization_labels = self._labels.copy() self._runtime_context_switches_labels = self._labels.copy() - self._open_file_descriptor_count_labels = self._labels.copy() def instrumentation_dependencies(self) -> Collection[str]: return _instruments def _instrument(self, **kwargs: Any): - # pylint: disable=too-many-branches + # pylint: disable=too-many-branches,too-many-statements meter_provider = kwargs.get("meter_provider") self._meter = get_meter( __name__, @@ -186,6 +213,8 @@ def _instrument(self, **kwargs: Any): schema_url="https://opentelemetry.io/schemas/1.11.0", ) + # system metrics + if "system.cpu.time" in self._config: self._meter.create_observable_counter( name="system.cpu.time", @@ -194,6 +223,7 @@ def _instrument(self, **kwargs: Any): unit="s", ) + # FIXME: double check this is divided by cpu core if "system.cpu.utilization" in self._config: self._meter.create_observable_gauge( name="system.cpu.utilization", @@ -218,6 +248,7 @@ def _instrument(self, **kwargs: Any): unit="1", ) + # FIXME: system.swap is gone in favour of system.paging if "system.swap.usage" in self._config: self._meter.create_observable_gauge( name="system.swap.usage", @@ -269,6 +300,7 @@ def _instrument(self, **kwargs: Any): unit="operations", ) + # FIXME: this has been replaced by system.disk.operation.time if "system.disk.time" in self._config: self._meter.create_observable_counter( name="system.disk.time", @@ -299,6 +331,7 @@ def _instrument(self, **kwargs: Any): # TODO Filesystem information can be obtained with os.statvfs in Unix-like # OSs, how to do the same in Windows? + # FIXME: this is now just system.network.dropped if "system.network.dropped.packets" in self._config: self._meter.create_observable_counter( name="system.network.dropped_packets", @@ -339,6 +372,7 @@ def _instrument(self, **kwargs: Any): unit="connections", ) + # FIXME: this is gone if "system.thread_count" in self._config: self._meter.create_observable_gauge( name="system.thread_count", @@ -346,6 +380,64 @@ def _instrument(self, **kwargs: Any): description="System active threads count", ) + # process metrics + + if "process.cpu.time" in self._config: + self._meter.create_observable_counter( + name="process.cpu.time", + callbacks=[self._get_cpu_time], + description="Total CPU seconds broken down by different states.", + unit="s", + ) + + if "process.cpu.utilization" in self._config: + create_process_cpu_utilization( + self._meter, callbacks=[self._get_cpu_utilization] + ) + + if "process.context_switches" in self._config: + self._meter.create_observable_counter( + name="process.context_switches", + callbacks=[self._get_context_switches], + description="Number of times the process has been context switched.", + ) + + if "process.memory.usage" in self._config: + self._meter.create_observable_up_down_counter( + name="process.memory.usage", + callbacks=[self._get_memory_usage], + description="The amount of physical memory in use.", + unit="By", + ) + + if "process.memory.virtual" in self._config: + self._meter.create_observable_up_down_counter( + name="process.memory.virtual", + callbacks=[self._get_memory_virtual], + description="The amount of committed virtual memory.", + unit="By", + ) + + if ( + sys.platform != "win32" + and "process.open_file_descriptor.count" in self._config + ): + self._meter.create_observable_up_down_counter( + name="process.open_file_descriptor.count", + callbacks=[self._get_open_file_descriptors], + description="Number of file descriptors in use by the process.", + ) + + if "process.thread.count" in self._config: + self._meter.create_observable_up_down_counter( + name="process.thread.count", + callbacks=[self._get_thread_count], + description="Process threads count.", + ) + + # FIXME: process.runtime keys are deprecated and will be removed in subsequent releases. + # When removing them, remember to clean also the callbacks and labels + if "process.runtime.memory" in self._config: self._meter.create_observable_up_down_counter( name=f"process.runtime.{self._python_implementation}.memory", @@ -398,16 +490,6 @@ def _instrument(self, **kwargs: Any): unit="switches", ) - if ( - sys.platform != "win32" - and "process.open_file_descriptor.count" in self._config - ): - self._meter.create_observable_up_down_counter( - name="process.open_file_descriptor.count", - callbacks=[self._get_open_file_descriptors], - description="Number of file descriptors in use by the process.", - ) - def _uninstrument(self, **kwargs: Any): pass @@ -685,6 +767,76 @@ def _get_system_thread_count( threading.active_count(), self._system_thread_count_labels ) + # process callbacks + + def _get_context_switches( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for context switches""" + ctx_switches = self._proc.num_ctx_switches() + for metric in self._config["process.context_switches"]: + if hasattr(ctx_switches, metric): + self._context_switches_labels["type"] = metric + yield Observation( + getattr(ctx_switches, metric), + self._context_switches_labels.copy(), + ) + + def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]: + """Observer callback for CPU time""" + proc_cpu = self._proc.cpu_times() + for metric in self._config["process.cpu.time"]: + if hasattr(proc_cpu, metric): + self._cpu_time_labels["type"] = metric + yield Observation( + getattr(proc_cpu, metric), + self._cpu_time_labels.copy(), + ) + + def _get_cpu_utilization( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for CPU utilization""" + proc_cpu_percent = self._proc.cpu_percent() + # may return None so add a default of 1 in case + num_cpus = psutil.cpu_count() or 1 + yield Observation( + proc_cpu_percent / 100 / num_cpus, + self._cpu_utilization_labels.copy(), + ) + + def _get_memory_usage( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for memory usage""" + proc_memory = self._proc.memory_info() + if hasattr(proc_memory, "rss"): + yield Observation( + getattr(proc_memory, "rss"), + self._memory_usage_labels.copy(), + ) + + def _get_memory_virtual( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for memory virtual""" + proc_memory = self._proc.memory_info() + if hasattr(proc_memory, "vms"): + yield Observation( + getattr(proc_memory, "vms"), + self._memory_virtual_labels.copy(), + ) + + def _get_thread_count( + self, options: CallbackOptions + ) -> Iterable[Observation]: + """Observer callback for active thread count""" + yield Observation( + self._proc.num_threads(), self._thread_count_labels.copy() + ) + + # runtime callbacks + def _get_runtime_memory( self, options: CallbackOptions ) -> Iterable[Observation]: diff --git a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py index 92c30a66f0..c5a600218b 100644 --- a/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py +++ b/instrumentation/opentelemetry-instrumentation-system-metrics/tests/test_system_metrics.py @@ -115,6 +115,12 @@ def test_system_metrics_instrument(self): "system.network.io", "system.network.connections", "system.thread_count", + "process.context_switches", + "process.cpu.time", + "process.cpu.utilization", + "process.memory.usage", + "process.memory.virtual", + "process.thread.count", f"process.runtime.{self.implementation}.memory", f"process.runtime.{self.implementation}.cpu_time", f"process.runtime.{self.implementation}.thread_count", @@ -122,28 +128,56 @@ def test_system_metrics_instrument(self): f"process.runtime.{self.implementation}.cpu.utilization", ] - on_windows = sys.platform == "win32" - if self.implementation == "pypy": - self.assertEqual(len(metric_names), 20 if on_windows else 21) - else: - self.assertEqual(len(metric_names), 21 if on_windows else 22) - observer_names.append( - f"process.runtime.{self.implementation}.gc_count", - ) - if not on_windows: + # platform dependent metrics + if sys.platform != "win32": observer_names.append( "process.open_file_descriptor.count", ) + if self.implementation != "pypy": + observer_names.append( + f"process.runtime.{self.implementation}.gc_count", + ) - for observer in metric_names: - self.assertIn(observer, observer_names) - observer_names.remove(observer) + self.assertEqual(sorted(metric_names), sorted(observer_names)) + + def test_process_metrics_instrument(self): + process_config = { + "process.context_switches": ["involuntary", "voluntary"], + "process.cpu.time": ["user", "system"], + "process.cpu.utilization": None, + "process.memory.usage": None, + "process.memory.virtual": None, + "process.open_file_descriptor.count": None, + "process.thread.count": None, + } + + reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[reader]) + process_metrics = SystemMetricsInstrumentor(config=process_config) + process_metrics.instrument(meter_provider=meter_provider) + + metric_names = [] + for resource_metrics in reader.get_metrics_data().resource_metrics: + for scope_metrics in resource_metrics.scope_metrics: + for metric in scope_metrics.metrics: + metric_names.append(metric.name) - if on_windows: - self.assertNotIn( - "process.open_file_descriptor.count", observer_names + observer_names = [ + "process.memory.usage", + "process.memory.virtual", + "process.cpu.time", + "process.thread.count", + "process.context_switches", + "process.cpu.utilization", + ] + # platform dependent metrics + if sys.platform != "win32": + observer_names.append( + "process.open_file_descriptor.count", ) + self.assertEqual(sorted(metric_names), sorted(observer_names)) + def test_runtime_metrics_instrument(self): runtime_config = { "process.runtime.memory": ["rss", "vms"], @@ -174,18 +208,12 @@ def test_runtime_metrics_instrument(self): f"process.runtime.{self.implementation}.context_switches", f"process.runtime.{self.implementation}.cpu.utilization", ] + if self.implementation != "pypy": + observer_names.append( + f"process.runtime.{self.implementation}.gc_count" + ) - if self.implementation == "pypy": - self.assertEqual(len(metric_names), 5) - else: - self.assertEqual(len(metric_names), 6) - observer_names.append( - f"process.runtime.{self.implementation}.gc_count" - ) - - for observer in metric_names: - self.assertIn(observer, observer_names) - observer_names.remove(observer) + self.assertEqual(sorted(metric_names), sorted(observer_names)) def _assert_metrics(self, observer_name, reader, expected): assertions = 0 @@ -773,6 +801,88 @@ def test_system_thread_count(self, threading_active_count): expected = [_SystemMetricsResult({}, 42)] self._test_metrics("system.thread_count", expected) + @mock.patch("psutil.Process.memory_info") + def test_memory_usage(self, mock_process_memory_info): + PMem = namedtuple("PMem", ["rss", "vms"]) + + mock_process_memory_info.configure_mock( + **{"return_value": PMem(rss=1, vms=2)} + ) + + expected = [ + _SystemMetricsResult({}, 1), + ] + self._test_metrics("process.memory.usage", expected) + + @mock.patch("psutil.Process.memory_info") + def test_memory_virtual(self, mock_process_memory_info): + PMem = namedtuple("PMem", ["rss", "vms"]) + + mock_process_memory_info.configure_mock( + **{"return_value": PMem(rss=1, vms=2)} + ) + + expected = [ + _SystemMetricsResult({}, 2), + ] + self._test_metrics("process.memory.virtual", expected) + + @mock.patch("psutil.Process.cpu_times") + def test_cpu_time(self, mock_process_cpu_times): + PCPUTimes = namedtuple("PCPUTimes", ["user", "system"]) + + mock_process_cpu_times.configure_mock( + **{"return_value": PCPUTimes(user=1.1, system=2.2)} + ) + + expected = [ + _SystemMetricsResult({"type": "user"}, 1.1), + _SystemMetricsResult({"type": "system"}, 2.2), + ] + self._test_metrics("process.cpu.time", expected) + + @mock.patch("psutil.Process.num_ctx_switches") + def test_context_switches(self, mock_process_num_ctx_switches): + PCtxSwitches = namedtuple("PCtxSwitches", ["voluntary", "involuntary"]) + + mock_process_num_ctx_switches.configure_mock( + **{"return_value": PCtxSwitches(voluntary=1, involuntary=2)} + ) + + expected = [ + _SystemMetricsResult({"type": "voluntary"}, 1), + _SystemMetricsResult({"type": "involuntary"}, 2), + ] + self._test_metrics("process.context_switches", expected) + + @mock.patch("psutil.Process.num_threads") + def test_thread_count(self, mock_process_thread_num): + mock_process_thread_num.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 42)] + self._test_metrics("process.thread.count", expected) + + @mock.patch("psutil.Process.cpu_percent") + @mock.patch("psutil.cpu_count") + def test_cpu_utilization(self, mock_cpu_count, mock_process_cpu_percent): + mock_cpu_count.return_value = 1 + mock_process_cpu_percent.configure_mock(**{"return_value": 42}) + + expected = [_SystemMetricsResult({}, 0.42)] + self._test_metrics("process.cpu.utilization", expected) + + @skipIf(sys.platform == "win32", "No file descriptors on Windows") + @mock.patch("psutil.Process.num_fds") + def test_open_file_descriptor_count(self, mock_process_num_fds): + mock_process_num_fds.configure_mock(**{"return_value": 3}) + + expected = [_SystemMetricsResult({}, 3)] + self._test_metrics( + "process.open_file_descriptor.count", + expected, + ) + mock_process_num_fds.assert_called() + @mock.patch("psutil.Process.memory_info") def test_runtime_memory(self, mock_process_memory_info): PMem = namedtuple("PMem", ["rss", "vms"]) @@ -838,7 +948,7 @@ def test_runtime_context_switches(self, mock_process_num_ctx_switches): ) @mock.patch("psutil.Process.num_threads") - def test_runtime_thread_num(self, mock_process_thread_num): + def test_runtime_thread_count(self, mock_process_thread_num): mock_process_thread_num.configure_mock(**{"return_value": 42}) expected = [_SystemMetricsResult({}, 42)] @@ -847,7 +957,7 @@ def test_runtime_thread_num(self, mock_process_thread_num): ) @mock.patch("psutil.Process.cpu_percent") - def test_runtime_cpu_percent(self, mock_process_cpu_percent): + def test_runtime_cpu_utilization(self, mock_process_cpu_percent): mock_process_cpu_percent.configure_mock(**{"return_value": 42}) expected = [_SystemMetricsResult({}, 0.42)] @@ -855,18 +965,6 @@ def test_runtime_cpu_percent(self, mock_process_cpu_percent): f"process.runtime.{self.implementation}.cpu.utilization", expected ) - @skipIf(sys.platform == "win32", "No file descriptors on Windows") - @mock.patch("psutil.Process.num_fds") - def test_open_file_descriptor_count(self, mock_process_num_fds): - mock_process_num_fds.configure_mock(**{"return_value": 3}) - - expected = [_SystemMetricsResult({}, 3)] - self._test_metrics( - "process.open_file_descriptor.count", - expected, - ) - mock_process_num_fds.assert_called() - class TestConfigSystemMetrics(TestBase): # pylint:disable=no-self-use