From 9a4fba0dcdf98e3370ad122850aa55a02dac060b Mon Sep 17 00:00:00 2001 From: Chris McClellan Date: Tue, 11 Feb 2025 14:40:41 -0500 Subject: [PATCH] Improve telemetry output 1. Actions and groups are now marked as errored when they fail 2. Set the otel.name so it includes the group or action name This lets us easily query/aggregate which groups or actions are failing most often. This fixes #185 and improves on the solution for #154 implemented in #157. We accomplish the dynamic span naming by leveraging one of the "special fields" detailed here https://docs.rs/tracing-opentelemetry/latest/tracing_opentelemetry/ The naming convention is loosely based on the OTEL http semantic convention of "{method} {url}". Several other of the semantic convention standards are similar as well. https://opentelemetry.io/docs/specs/semconv/http/http-spans/ In order to access the OTEL Span's `set_status()` method, I needed to upgrade all of our opentelemetry dependencies to the latest version. When I did this, traces were no longer being properly flushed before exiting the application, so I updated the implementation based off of this example from the opentelemetry-otlp crate. https://github.com/tokio-rs/tracing-opentelemetry/blob/v0.1.x/examples/opentelemetry-otlp.rs --- Cargo.lock | 275 +++++++++++------------------------- Cargo.toml | 10 +- scope/src/doctor/runner.rs | 39 ++++- scope/src/shared/logging.rs | 201 ++++++++++++++------------ 4 files changed, 235 insertions(+), 290 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8bc3ad6..31a1e99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,18 +199,17 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.6.20" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" dependencies = [ "async-trait", "axum-core", - "bitflags 1.3.2", "bytes", "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.28", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", "itoa", "matchit", "memchr", @@ -219,7 +218,7 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.1", "tower", "tower-layer", "tower-service", @@ -227,17 +226,20 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.3.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ "async-trait", "bytes", "futures-util", - "http 0.2.12", - "http-body 0.4.6", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", "mime", + "pin-project-lite", "rustversion", + "sync_wrapper 1.0.1", "tower-layer", "tower-service", ] @@ -330,9 +332,9 @@ checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytes" -version = "1.5.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" [[package]] name = "cc" @@ -644,7 +646,7 @@ dependencies = [ "ignore", "indicatif", "inquire", - "itertools 0.13.0", + "itertools", "json", "jsonschema", "jsonwebtoken", @@ -1288,9 +1290,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.2.0" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186548d73ac615b32a73aafe38fb4f56c0d340e110e5a200bcadbaf2e199263a" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", @@ -1299,6 +1301,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -1306,20 +1309,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.28", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.26.0" @@ -1328,7 +1317,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.2.0", + "hyper 1.5.2", "hyper-util", "log", "rustls 0.22.3", @@ -1347,9 +1336,10 @@ checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.2.0", + "hyper 1.5.2", "hyper-util", "rustls 0.23.7", + "rustls-native-certs 0.7.0", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -1357,25 +1347,13 @@ dependencies = [ "webpki-roots", ] -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper 0.14.28", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - [[package]] name = "hyper-timeout" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" dependencies = [ - "hyper 1.2.0", + "hyper 1.5.2", "hyper-util", "pin-project-lite", "tokio", @@ -1390,7 +1368,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.2.0", + "hyper 1.5.2", "hyper-util", "native-tls", "tokio", @@ -1400,20 +1378,19 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.3" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.0", - "hyper 1.2.0", + "hyper 1.5.2", "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] @@ -1549,15 +1526,6 @@ dependencies = [ "nom", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2022,9 +1990,9 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.5.2", "hyper-rustls 0.26.0", - "hyper-timeout 0.5.1", + "hyper-timeout", "hyper-util", "jsonwebtoken", "once_cell", @@ -2105,58 +2073,57 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.22.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" +checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7" dependencies = [ "futures-core", "futures-sink", "js-sys", - "once_cell", "pin-project-lite", "thiserror", - "urlencoding", + "tracing", ] [[package]] name = "opentelemetry-http" -version = "0.11.1" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7690dc77bf776713848c4faa6501157469017eaf332baccd4eb1cea928743d94" +checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80" dependencies = [ "async-trait", "bytes", - "http 0.2.12", + "http 1.1.0", "opentelemetry", - "reqwest 0.11.27", + "reqwest 0.12.8", ] [[package]] name = "opentelemetry-otlp" -version = "0.15.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a016b8d9495c639af2145ac22387dcb88e44118e45320d9238fbf4e7889abcb" +checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76" dependencies = [ "async-trait", "futures-core", - "http 0.2.12", + "http 1.1.0", "opentelemetry", "opentelemetry-http", "opentelemetry-proto", - "opentelemetry-semantic-conventions", "opentelemetry_sdk", "prost", - "reqwest 0.11.27", + "reqwest 0.12.8", "thiserror", "tokio", "tonic", + "tracing", ] [[package]] name = "opentelemetry-proto" -version = "0.5.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8fddc9b68f5b80dae9d6f510b88e02396f006ad48cac349411fbecc80caae4" +checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -2164,32 +2131,25 @@ dependencies = [ "tonic", ] -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9ab5bd6c42fb9349dcf28af2ba9a0667f697f9bdcca045d39f2cec5543e2910" - [[package]] name = "opentelemetry_sdk" -version = "0.22.1" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" +checksum = "231e9d6ceef9b0b2546ddf52335785ce41252bc7474ee8ba05bfad277be13ab8" dependencies = [ "async-trait", - "crossbeam-channel", "futures-channel", "futures-executor", "futures-util", "glob", - "once_cell", "opentelemetry", - "ordered-float", "percent-encoding", "rand", + "serde_json", "thiserror", "tokio", "tokio-stream", + "tracing", ] [[package]] @@ -2198,15 +2158,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "4.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" -dependencies = [ - "num-traits", -] - [[package]] name = "os_info" version = "3.8.0" @@ -2382,9 +2333,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.4" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", "prost-derive", @@ -2392,12 +2343,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.4" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools", "proc-macro2", "quote", "syn 2.0.52", @@ -2553,7 +2504,6 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "hyper 0.14.28", - "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2561,16 +2511,12 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.12", - "rustls-native-certs 0.6.3", - "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", "sync_wrapper 0.1.2", "system-configuration 0.5.1", "tokio", - "tokio-rustls 0.24.1", "tower-service", "url", "wasm-bindgen", @@ -2588,13 +2534,14 @@ dependencies = [ "base64 0.22.0", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2 0.4.5", "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.5.2", "hyper-rustls 0.27.2", "hyper-tls", "hyper-util", @@ -2609,7 +2556,8 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.7", - "rustls-pemfile 2.1.2", + "rustls-native-certs 0.8.0", + "rustls-pemfile", "rustls-pki-types", "serde", "serde_json", @@ -2668,18 +2616,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.22.3" @@ -2689,7 +2625,7 @@ dependencies = [ "log", "ring", "rustls-pki-types", - "rustls-webpki 0.102.2", + "rustls-webpki", "subtle", "zeroize", ] @@ -2703,45 +2639,37 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.2", + "rustls-webpki", "subtle", "zeroize", ] [[package]] name = "rustls-native-certs" -version = "0.6.3" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" dependencies = [ "openssl-probe", - "rustls-pemfile 1.0.4", + "rustls-pemfile", + "rustls-pki-types", "schannel", "security-framework", ] [[package]] name = "rustls-native-certs" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" +checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" dependencies = [ "openssl-probe", - "rustls-pemfile 2.1.2", + "rustls-pemfile", "rustls-pki-types", "schannel", "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pemfile" version = "2.1.2" @@ -2758,16 +2686,6 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247" -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.102.2" @@ -2839,16 +2757,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "secrecy" version = "0.8.0" @@ -3352,16 +3260,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "2.4.0" @@ -3383,16 +3281,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.25.0" @@ -3417,9 +3305,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -3475,23 +3363,26 @@ dependencies = [ [[package]] name = "tonic" -version = "0.11.0" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", "axum", - "base64 0.21.7", + "base64 0.22.0", "bytes", - "h2 0.3.25", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.28", - "hyper-timeout 0.4.1", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.5.2", + "hyper-timeout", + "hyper-util", "percent-encoding", "pin-project", "prost", + "socket2", "tokio", "tokio-stream", "tower", @@ -3622,9 +3513,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.23.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9be14ba1bbe4ab79e9229f7f89fab8d120b865859f10527f31c033e599d2284" +checksum = "97a971f6058498b5c0f1affa23e7ea202057a7301dbff68e968b2d578bcbd053" dependencies = [ "js-sys", "once_cell", @@ -3750,12 +3641,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 84bd489..b3606da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,11 +83,11 @@ jsonwebtoken = "9.3.0" secrecy = "0.8.0" url = "2.5.2" dotenvy = "0.15.7" -opentelemetry = { version = "0.22.0", features = ["metrics"] } -opentelemetry-otlp = { version = "0.15.0", features = ["metrics", "reqwest", "http-proto", "reqwest-client", "reqwest-rustls"] } -tracing-opentelemetry = "0.23.0" -opentelemetry_sdk = { version = "0.22.1", features = ["metrics", "rt-tokio"] } -tonic = "0.11.0" +opentelemetry = { version = "0.27.0", features = ["metrics"] } +opentelemetry-otlp = { version = "0.27.0", features = ["metrics", "reqwest", "http-proto", "reqwest-client", "reqwest-rustls"] } +tracing-opentelemetry = "0.28.0" +opentelemetry_sdk = { version = "0.27.0", features = ["metrics", "rt-tokio"] } +tonic = "0.12.0" gethostname = "0.4.3" normpath = "1.3.0" fake = "2.10.0" diff --git a/scope/src/doctor/runner.rs b/scope/src/doctor/runner.rs index 1548e41..fbfbbb8 100644 --- a/scope/src/doctor/runner.rs +++ b/scope/src/doctor/runner.rs @@ -4,6 +4,7 @@ use crate::report_stdout; use crate::shared::prelude::DoctorGroup; use anyhow::Result; use colored::Colorize; +use opentelemetry::trace::Status; use petgraph::dot::{Config, Dot}; use petgraph::prelude::*; use petgraph::visit::{DfsPostOrder, Walker}; @@ -13,6 +14,7 @@ use std::path::PathBuf; use std::sync::Arc; use tracing::{debug, error, info, info_span, warn, Instrument, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; +use tracing_opentelemetry::OpenTelemetrySpanExt; #[derive(Debug)] pub struct PathRunResult { @@ -160,12 +162,27 @@ where continue; } - let group_span = info_span!(parent: &header_span, "group", "indicatif.pb_show" = true, "group.name" = group_name); + let group_span = info_span!( + parent: &header_span, + "group", + "indicatif.pb_show" = true, + "group.name" = group_name, + "otel.name" = format!("group {}", group_name) + ); group_span.pb_set_length(group_container.actions.len() as u64); group_span.pb_set_message(&format!("group {}", group_name)); let _span = group_span.enter(); let group_result = self.execute_group(&group_span, group_container).await?; + if let GroupExecutionStatus::Failed = group_result.status { + group_span.set_status(Status::Error { + description: std::borrow::Cow::Owned(format!( + "{} group failed", + group_result.group_name + )), + }); + } + run_result.process(&group_result); skip_remaining |= group_result.skip_remaining; @@ -193,7 +210,14 @@ where continue; } - let action_span = info_span!(parent: group_span, "action", "indicatif.pb_show" = true, "action.name" = action.name()); + let action_span = info_span!( + parent: group_span, + "action", + "indicatif.pb_show" = true, + "group.name" = container.group_name, + "action.name" = action.name(), + "otel.name" = format!("action {}", action.name()) + ); action_span.pb_set_message(&format!( "action {} - {}", action.name(), @@ -203,9 +227,18 @@ where let action_result = action .run_action(prompt_user) - .instrument(action_span) + .instrument(action_span.clone()) .await?; + if action_result.status.is_failure() { + action_span.set_status(Status::Error { + description: std::borrow::Cow::Owned(format!( + "{} action failed", + action_result.action_name + )), + }); + } + results .group_report .add_action(&action_result.action_report); diff --git a/scope/src/shared/logging.rs b/scope/src/shared/logging.rs index 95a5bd1..4f44ed6 100644 --- a/scope/src/shared/logging.rs +++ b/scope/src/shared/logging.rs @@ -2,18 +2,19 @@ use clap::{ArgGroup, Parser, ValueEnum}; use gethostname::gethostname; use indicatif::ProgressStyle; use lazy_static::lazy_static; -use opentelemetry::{global, KeyValue}; -use opentelemetry_otlp::{ - HttpExporterBuilder, MetricsExporterBuilder, SpanExporterBuilder, TonicExporterBuilder, - WithExportConfig, + +use opentelemetry::{ + trace::{TraceError, TracerProvider as _}, + KeyValue, }; -use opentelemetry_sdk::metrics::reader::{DefaultAggregationSelector, DefaultTemporalitySelector}; -use opentelemetry_sdk::metrics::SdkMeterProvider; -use opentelemetry_sdk::trace::Tracer; +use opentelemetry_otlp::{MetricExporter, SpanExporter, WithExportConfig, WithTonicConfig}; use opentelemetry_sdk::{ - trace::{self, RandomIdGenerator, Sampler}, + metrics::{MetricError, PeriodicReader, SdkMeterProvider}, + runtime, + trace::{RandomIdGenerator, Sampler, TracerProvider}, Resource, }; + use std::env; use std::fs::File; use std::io::{IsTerminal, Write}; @@ -27,7 +28,7 @@ use tracing::level_filters::LevelFilter; use tracing::warn; use tracing_indicatif::filter::{hide_indicatif_span_fields, IndicatifFilter}; use tracing_indicatif::IndicatifLayer; -use tracing_opentelemetry::MetricsLayer; +use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer}; use tracing_subscriber::fmt::format::DefaultFields; use tracing_subscriber::{filter::filter_fn, prelude::*}; use tracing_subscriber::{ @@ -152,8 +153,9 @@ pub struct ConfiguredLogger { pub log_location: String, } +/// RAII wrapper that ensures metrics and traces are flushed on shutdown struct OtelProperties { - tracer: Tracer, + tracer: TracerProvider, metrics: SdkMeterProvider, } @@ -162,7 +164,9 @@ impl Drop for OtelProperties { if let Err(e) = self.metrics.shutdown() { warn!("Unable to emit final metrics: {:?}", e); } - global::shutdown_tracer_provider(); + if let Err(e) = self.tracer.shutdown() { + warn!("Unable to emit final traces: {:?}", e); + } } } @@ -188,92 +192,116 @@ impl LoggingOpts { } } - fn make_tonic_exporter(&self, id: &str) -> TonicExporterBuilder { - let endpoint = self.otel_collector.clone().unwrap(); - let mut map = MetadataMap::with_capacity(2); + fn setup_otel(&self, run_id: &str) -> Result, anyhow::Error> { + if self.otel_collector.is_some() { + let resource = self.resource(run_id); + let metadata_map = self.metadata_map(run_id); + let timeout = Duration::from_secs(3); + let endpoint = &self.otel_collector.clone().unwrap(); - map.insert( - "host", - gethostname() - .into_string() - .unwrap_or_else(|_| "unknown".to_string()) - .parse() - .unwrap(), - ); - map.insert("scope.id", id.parse().unwrap()); + let tracer = self.init_tracer_provider(&resource, &metadata_map, endpoint, &timeout)?; + let metrics = self.init_meter_provider(&resource, &metadata_map, endpoint, &timeout)?; - opentelemetry_otlp::new_exporter() - .tonic() - .with_endpoint(endpoint) - .with_timeout(Duration::from_secs(3)) - .with_metadata(map) + Ok(Some(OtelProperties { metrics, tracer })) + } else { + Ok(None) + } } - fn make_http_exporter(&self) -> HttpExporterBuilder { - let endpoint = self.otel_collector.clone().unwrap(); + fn init_tracer_provider( + &self, + resource: &Resource, + metadata_map: &MetadataMap, + endpoint: &str, + timeout: &Duration, + ) -> Result { + let span_exporter = match self.otel_protocol { + OtelProtocol::Grpc => SpanExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_timeout(*timeout) + .with_metadata(metadata_map.clone()) + .build(), + OtelProtocol::Http => SpanExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_timeout(*timeout) + .build(), + }; + + let tracer = TracerProvider::builder() + .with_batch_exporter(span_exporter?, runtime::Tokio) + .with_sampler(Sampler::AlwaysOn) + .with_id_generator(RandomIdGenerator::default()) + .with_max_attributes_per_span(16) + .with_max_events_per_span(16) + .with_resource(resource.clone()) + .build(); - opentelemetry_otlp::new_exporter() - .http() - .with_endpoint(endpoint) - .with_timeout(Duration::from_secs(3)) + Ok(tracer) } - fn make_span_exporter_builder(&self, id: &str) -> SpanExporterBuilder { - match self.otel_protocol { - OtelProtocol::Grpc => SpanExporterBuilder::Tonic(self.make_tonic_exporter(id)), - OtelProtocol::Http => SpanExporterBuilder::Http(self.make_http_exporter()), - } + fn init_meter_provider( + &self, + resource: &Resource, + metadata_map: &MetadataMap, + endpoint: &str, + timeout: &Duration, + ) -> Result { + let metric_exporter = match self.otel_protocol { + OtelProtocol::Grpc => MetricExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_timeout(*timeout) + .with_metadata(metadata_map.clone()) + .build(), + OtelProtocol::Http => MetricExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_timeout(*timeout) + .build(), + }; + + let reader = PeriodicReader::builder(metric_exporter?, runtime::Tokio) + .with_interval(Duration::from_secs(3)) + .with_timeout(Duration::from_secs(10)) + .build(); + + let metrics = SdkMeterProvider::builder() + .with_reader(reader) + .with_resource(resource.clone()) + .build(); + + Ok(metrics) } - fn make_metrics_exporter_builder(&self, id: &str) -> MetricsExporterBuilder { - match self.otel_protocol { - OtelProtocol::Grpc => MetricsExporterBuilder::Tonic(self.make_tonic_exporter(id)), - OtelProtocol::Http => MetricsExporterBuilder::Http(self.make_http_exporter()), - } + fn resource(&self, run_id: &str) -> Resource { + Resource::new(vec![ + KeyValue::new("service.name", self.otel_service.clone()), + KeyValue::new("service.version", env!("CARGO_PKG_VERSION")), + KeyValue::new( + "host.name", + gethostname() + .into_string() + .unwrap_or_else(|_| "unknown".to_string()), + ), + KeyValue::new("scope.id", run_id.to_string()), + ]) } - fn setup_otel(&self, run_id: &str) -> Result, anyhow::Error> { - if self.otel_collector.is_some() { - let resources = Resource::new(vec![ - KeyValue::new("service.name", self.otel_service.clone()), - KeyValue::new("service.version", env!("CARGO_PKG_VERSION")), - KeyValue::new( - "host.name", - gethostname() - .into_string() - .unwrap_or_else(|_| "unknown".to_string()), - ), - KeyValue::new("scope.id", run_id.to_string()), - ]); - - let tracer = opentelemetry_otlp::new_pipeline() - .tracing() - .with_exporter(self.make_span_exporter_builder(run_id)) - .with_trace_config( - trace::config() - .with_sampler(Sampler::AlwaysOn) - .with_id_generator(RandomIdGenerator::default()) - .with_max_events_per_span(64) - .with_max_attributes_per_span(16) - .with_max_events_per_span(16) - .with_resource(resources.clone()), - ) - .install_batch(opentelemetry_sdk::runtime::Tokio)?; - - let metrics = opentelemetry_otlp::new_pipeline() - .metrics(opentelemetry_sdk::runtime::Tokio) - .with_exporter(self.make_metrics_exporter_builder(run_id)) - .with_resource(resources) - .with_period(Duration::from_secs(3)) - .with_timeout(Duration::from_secs(10)) - .with_aggregation_selector(DefaultAggregationSelector::new()) - .with_temporality_selector(DefaultTemporalitySelector::new()) - .build()?; + fn metadata_map(&self, run_id: &str) -> MetadataMap { + let mut metadata_map = MetadataMap::with_capacity(2); + metadata_map.insert( + "host", + gethostname() + .into_string() + .unwrap_or_else(|_| "unknown".to_string()) + .parse() + .unwrap(), + ); + metadata_map.insert("scope.id", run_id.parse().unwrap()); - Ok(Some(OtelProperties { metrics, tracer })) - } else { - Ok(None) - } + metadata_map } pub async fn configure_logging(&self, run_id: &str, prefix: &str) -> ConfiguredLogger { @@ -354,8 +382,7 @@ impl LoggingOpts { let (otel_tracer_layer, otel_metrics_layer) = match otel_props { Some(ref props) => ( Some( - tracing_opentelemetry::layer() - .with_tracer(props.tracer.clone()) + OpenTelemetryLayer::new(props.tracer.tracer("scope")) .with_filter(filter_func.clone()), ), Some(MetricsLayer::new(props.metrics.clone()).with_filter(filter_func)),