Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opentelemetry-instrumentation-system-metrics: add process metrics #3250

Merged
merged 13 commits into from
Feb 21, 2025
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- `opentelemetry-instrumentation-system-metrics` Add `process` metrics and deprecated `process.runtime` prefixed ones
([#3250](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3250))
- `opentelemetry-instrumentation-botocore` Add support for GenAI user events and lazy initialize tracer
([#3258](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3258))
- `opentelemetry-instrumentation-botocore` Add support for GenAI system events
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@
"system.network.io": ["transmit", "receive"],
"system.network.connections": ["family", "type"],
"system.thread_count": None
"process.context_switches": ["involuntary", "voluntary"],
"process.cpu.time": ["user", "system"],
"process.cpu.utilization": None,
"process.memory.usage": None,
"process.memory.virtual": None,
Comment on lines +40 to +41
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct that these two combine to replace "process.runtime.memory": ["rss", "vms"],?

"process.open_file_descriptor.count": None,
"process.thread.count": None,
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.gc_count": None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't be deprecated, right?

"process.runtime.thread_count": None,
"process.runtime.cpu.utilization": None,
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.open_file_descriptor.count": None,
}

Usage
Expand All @@ -66,12 +72,17 @@
"system.memory.usage": ["used", "free", "cached"],
"system.cpu.time": ["idle", "user", "system", "irq"],
"system.network.io": ["transmit", "receive"],
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.memory.usage": None,
"process.memory.virtual": None,
"process.cpu.time": ["user", "system"],
"process.context_switches": ["involuntary", "voluntary"],
}
SystemMetricsInstrumentor(config=configuration).instrument()


Out-of-spec `process.runtime` prefixed metrics are deprecated and will be removed in future versions, users are encouraged to move
to the `process` metrics.

API
---
"""
Expand All @@ -92,6 +103,9 @@
from opentelemetry.instrumentation.system_metrics.package import _instruments
from opentelemetry.instrumentation.system_metrics.version import __version__
from opentelemetry.metrics import CallbackOptions, Observation, get_meter
from opentelemetry.semconv._incubating.metrics.process_metrics import (
create_process_cpu_utilization,
)

_logger = logging.getLogger(__name__)

Expand All @@ -112,13 +126,19 @@
"system.network.io": ["transmit", "receive"],
"system.network.connections": ["family", "type"],
"system.thread_count": None,
"process.context_switches": ["involuntary", "voluntary"],
"process.cpu.time": ["user", "system"],
"process.cpu.utilization": ["user", "system"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are there config values here?

"process.memory.usage": None,
"process.memory.virtual": None,
"process.open_file_descriptor.count": None,
"process.thread.count": None,
"process.runtime.memory": ["rss", "vms"],
"process.runtime.cpu.time": ["user", "system"],
"process.runtime.gc_count": None,
"process.runtime.thread_count": None,
"process.runtime.cpu.utilization": None,
"process.runtime.context_switches": ["involuntary", "voluntary"],
"process.open_file_descriptor.count": None,
}

if sys.platform == "darwin":
Expand Down Expand Up @@ -165,19 +185,26 @@ def __init__(

self._system_thread_count_labels = self._labels.copy()

self._context_switches_labels = self._labels.copy()
self._cpu_time_labels = self._labels.copy()
self._cpu_utilization_labels = self._labels.copy()
self._memory_usage_labels = self._labels.copy()
self._memory_virtual_labels = self._labels.copy()
self._open_file_descriptor_count_labels = self._labels.copy()
self._thread_count_labels = self._labels.copy()

self._runtime_memory_labels = self._labels.copy()
self._runtime_cpu_time_labels = self._labels.copy()
self._runtime_gc_count_labels = self._labels.copy()
self._runtime_thread_count_labels = self._labels.copy()
self._runtime_cpu_utilization_labels = self._labels.copy()
self._runtime_context_switches_labels = self._labels.copy()
self._open_file_descriptor_count_labels = self._labels.copy()

def instrumentation_dependencies(self) -> Collection[str]:
return _instruments

def _instrument(self, **kwargs: Any):
# pylint: disable=too-many-branches
# pylint: disable=too-many-branches,too-many-statements
meter_provider = kwargs.get("meter_provider")
self._meter = get_meter(
__name__,
Expand All @@ -186,6 +213,8 @@ def _instrument(self, **kwargs: Any):
schema_url="https://opentelemetry.io/schemas/1.11.0",
)

# system metrics

if "system.cpu.time" in self._config:
self._meter.create_observable_counter(
name="system.cpu.time",
Expand All @@ -194,6 +223,7 @@ def _instrument(self, **kwargs: Any):
unit="s",
)

# FIXME: double check this is divided by cpu core
if "system.cpu.utilization" in self._config:
self._meter.create_observable_gauge(
name="system.cpu.utilization",
Expand All @@ -218,6 +248,7 @@ def _instrument(self, **kwargs: Any):
unit="1",
)

# FIXME: system.swap is gone in favour of system.paging
if "system.swap.usage" in self._config:
self._meter.create_observable_gauge(
name="system.swap.usage",
Expand Down Expand Up @@ -269,6 +300,7 @@ def _instrument(self, **kwargs: Any):
unit="operations",
)

# FIXME: this has been replaced by system.disk.operation.time
if "system.disk.time" in self._config:
self._meter.create_observable_counter(
name="system.disk.time",
Expand Down Expand Up @@ -299,6 +331,7 @@ def _instrument(self, **kwargs: Any):
# TODO Filesystem information can be obtained with os.statvfs in Unix-like
# OSs, how to do the same in Windows?

# FIXME: this is now just system.network.dropped
if "system.network.dropped.packets" in self._config:
self._meter.create_observable_counter(
name="system.network.dropped_packets",
Expand Down Expand Up @@ -339,13 +372,72 @@ def _instrument(self, **kwargs: Any):
unit="connections",
)

# FIXME: this is gone
if "system.thread_count" in self._config:
self._meter.create_observable_gauge(
name="system.thread_count",
callbacks=[self._get_system_thread_count],
description="System active threads count",
)

# process metrics

if "process.cpu.time" in self._config:
self._meter.create_observable_counter(
name="process.cpu.time",
callbacks=[self._get_cpu_time],
description="Total CPU seconds broken down by different states.",
unit="s",
)

if "process.cpu.utilization" in self._config:
create_process_cpu_utilization(
self._meter, callbacks=[self._get_cpu_utilization]
)

if "process.context_switches" in self._config:
self._meter.create_observable_counter(
name="process.context_switches",
callbacks=[self._get_context_switches],
description="Number of times the process has been context switched.",
)

if "process.memory.usage" in self._config:
self._meter.create_observable_up_down_counter(
name="process.memory.usage",
callbacks=[self._get_memory_usage],
description="The amount of physical memory in use.",
unit="By",
)

if "process.memory.virtual" in self._config:
self._meter.create_observable_up_down_counter(
name="process.memory.virtual",
callbacks=[self._get_memory_virtual],
description="The amount of committed virtual memory.",
unit="By",
)

if (
sys.platform != "win32"
and "process.open_file_descriptor.count" in self._config
):
self._meter.create_observable_up_down_counter(
name="process.open_file_descriptor.count",
callbacks=[self._get_open_file_descriptors],
description="Number of file descriptors in use by the process.",
)

if "process.thread.count" in self._config:
self._meter.create_observable_up_down_counter(
name="process.thread.count",
callbacks=[self._get_thread_count],
description="Process threads count.",
)

# FIXME: process.runtime keys are deprecated and will be removed in subsequent releases.
# When removing them, remember to clean also the callbacks and labels

if "process.runtime.memory" in self._config:
self._meter.create_observable_up_down_counter(
name=f"process.runtime.{self._python_implementation}.memory",
Expand Down Expand Up @@ -398,16 +490,6 @@ def _instrument(self, **kwargs: Any):
unit="switches",
)

if (
sys.platform != "win32"
and "process.open_file_descriptor.count" in self._config
):
self._meter.create_observable_up_down_counter(
name="process.open_file_descriptor.count",
callbacks=[self._get_open_file_descriptors],
description="Number of file descriptors in use by the process.",
)

def _uninstrument(self, **kwargs: Any):
pass

Expand Down Expand Up @@ -685,6 +767,76 @@ def _get_system_thread_count(
threading.active_count(), self._system_thread_count_labels
)

# process callbacks

def _get_context_switches(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for context switches"""
ctx_switches = self._proc.num_ctx_switches()
for metric in self._config["process.context_switches"]:
if hasattr(ctx_switches, metric):
self._context_switches_labels["type"] = metric
yield Observation(
getattr(ctx_switches, metric),
self._context_switches_labels.copy(),
)

def _get_cpu_time(self, options: CallbackOptions) -> Iterable[Observation]:
"""Observer callback for CPU time"""
proc_cpu = self._proc.cpu_times()
for metric in self._config["process.cpu.time"]:
if hasattr(proc_cpu, metric):
self._cpu_time_labels["type"] = metric
yield Observation(
getattr(proc_cpu, metric),
self._cpu_time_labels.copy(),
)

def _get_cpu_utilization(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for CPU utilization"""
proc_cpu_percent = self._proc.cpu_percent()
# may return None so add a default of 1 in case
num_cpus = psutil.cpu_count() or 1
yield Observation(
proc_cpu_percent / 100 / num_cpus,
self._cpu_utilization_labels.copy(),
)

def _get_memory_usage(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for memory usage"""
proc_memory = self._proc.memory_info()
if hasattr(proc_memory, "rss"):
yield Observation(
getattr(proc_memory, "rss"),
self._memory_usage_labels.copy(),
)

def _get_memory_virtual(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for memory virtual"""
proc_memory = self._proc.memory_info()
if hasattr(proc_memory, "vms"):
yield Observation(
getattr(proc_memory, "vms"),
self._memory_virtual_labels.copy(),
)

def _get_thread_count(
self, options: CallbackOptions
) -> Iterable[Observation]:
"""Observer callback for active thread count"""
yield Observation(
self._proc.num_threads(), self._thread_count_labels.copy()
)

# runtime callbacks

def _get_runtime_memory(
self, options: CallbackOptions
) -> Iterable[Observation]:
Expand Down
Loading